diff options
author | whitequark <whitequark@whitequark.org> | 2020-06-15 06:08:17 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-15 06:08:17 +0000 |
commit | 334ec5fa0a90b1816060c67bff72362cbd6483c7 (patch) | |
tree | 9dd04e891180e9041f8c44627314279ba5bc921c | |
parent | 9d0f1aa222972735412793ccb6ee7da609170fab (diff) | |
parent | f7ae9b0851b96d8691b54f0792b526a18c715a30 (diff) | |
download | yosys-334ec5fa0a90b1816060c67bff72362cbd6483c7.tar.gz yosys-334ec5fa0a90b1816060c67bff72362cbd6483c7.tar.bz2 yosys-334ec5fa0a90b1816060c67bff72362cbd6483c7.zip |
Merge pull request #2159 from MerryMage/cxxrtl-mul
cxxrtl: Implement chunk-wise multiplication
-rw-r--r-- | backends/cxxrtl/cxxrtl.h | 39 |
1 files changed, 22 insertions, 17 deletions
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index 166b4896f..85f45ac7f 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -66,6 +66,7 @@ namespace cxxrtl { // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be // clobbered results in simpler generated code. typedef uint32_t chunk_t; +typedef uint64_t wide_chunk_t; template<typename T> struct chunk_traits { @@ -454,6 +455,24 @@ struct value : public expr_base<value<Bits>> { bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg()); return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b } + + template<size_t ResultBits> + value<ResultBits> mul(const value<Bits> &other) const { + value<ResultBits> result; + wide_chunk_t wide_result[result.chunks + 1] = {}; + for (size_t n = 0; n < chunks; n++) { + for (size_t m = 0; m < chunks && n + m < result.chunks; m++) { + wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]); + wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits; + wide_result[n + m] &= chunk::mask; + } + } + for (size_t n = 0; n < result.chunks; n++) { + result.data[n] = wide_result[n]; + } + result.data[result.chunks - 1] &= result.msb_mask; + return result; + } }; // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here. @@ -1306,28 +1325,14 @@ value<BitsY> sub_ss(const value<BitsA> &a, const value<BitsB> &b) { template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> mul_uu(const value<BitsA> &a, const value<BitsB> &b) { - value<BitsY> product; - value<BitsY> multiplicand = a.template zcast<BitsY>(); - const value<BitsB> &multiplier = b; - uint32_t multiplicand_shift = 0; - for (size_t step = 0; step < BitsB; step++) { - if (multiplier.bit(step)) { - multiplicand = multiplicand.shl(value<32> { multiplicand_shift }); - product = product.add(multiplicand); - multiplicand_shift = 0; - } - multiplicand_shift++; - } - return product; + constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB; + return a.template zcast<BitsM>().template mul<BitsY>(b.template zcast<BitsM>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> mul_ss(const value<BitsA> &a, const value<BitsB> &b) { - value<BitsB + 1> ub = b.template sext<BitsB + 1>(); - if (ub.is_neg()) ub = ub.neg(); - value<BitsY> y = mul_uu<BitsY>(a.template scast<BitsY>(), ub); - return b.is_neg() ? y.neg() : y; + return a.template scast<BitsY>().template mul<BitsY>(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> |