From b9721bedf01ca1f536bbf13ba761333c6867bd29 Mon Sep 17 00:00:00 2001
From: whitequark <whitequark@whitequark.org>
Date: Mon, 21 Dec 2020 02:15:55 +0000
Subject: cxxrtl: speed up bit repeats (sign extends, etc).

On Minerva SoC SRAM, depending on the compiler, this change improves
overall time by 4-7%.
---
 backends/cxxrtl/cxxrtl.h          |  8 ++++++++
 backends/cxxrtl/cxxrtl_backend.cc | 25 ++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h
index 3c315c7df..0a6bcb849 100644
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@@ -317,6 +317,14 @@ struct value : public expr_base<value<Bits>> {
 		return sext_cast<NewBits>()(*this);
 	}
 
+	// Bit replication is far more efficient than the equivalent concatenation.
+	template<size_t Count>
+	CXXRTL_ALWAYS_INLINE
+	value<Bits * Count> repeat() const {
+		static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
+		return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
+	}
+
 	// Operations with run-time parameters (offsets, amounts, etc).
 	//
 	// These operations are used for computations.
diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc
index 3b2fb4985..916303bfe 100644
--- a/backends/cxxrtl/cxxrtl_backend.cc
+++ b/backends/cxxrtl/cxxrtl_backend.cc
@@ -832,11 +832,26 @@ struct CxxrtlWorker {
 		} else if (sig.is_chunk()) {
 			return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug);
 		} else {
-			dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug);
-			for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) {
-				f << ".concat(";
-				dump_sigchunk(*it, is_lhs, for_debug);
-				f << ")";
+			bool first = true;
+			auto chunks = sig.chunks();
+			for (auto it = chunks.rbegin(); it != chunks.rend(); it++) {
+				if (!first)
+					f << ".concat(";
+				bool is_complex = dump_sigchunk(*it, is_lhs, for_debug);
+				if (!is_lhs && it->width == 1) {
+					size_t repeat = 1;
+					while ((it + repeat) != chunks.rend() && *(it + repeat) == *it)
+						repeat++;
+					if (repeat > 1) {
+						if (is_complex)
+							f << ".val()";
+						f << ".repeat<" << repeat << ">()";
+					}
+					it += repeat - 1;
+				}
+				if (!first)
+					f << ")";
+				first = false;
 			}
 			return true;
 		}
-- 
cgit v1.2.3