aboutsummaryrefslogtreecommitdiffstats
path: root/backends/cxxrtl
diff options
context:
space:
mode:
Diffstat (limited to 'backends/cxxrtl')
-rw-r--r--backends/cxxrtl/cxxrtl.h45
1 files changed, 26 insertions, 19 deletions
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h
index c510b33d7..85f45ac7f 100644
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@@ -66,6 +66,7 @@ namespace cxxrtl {
// Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be
// clobbered results in simpler generated code.
typedef uint32_t chunk_t;
+typedef uint64_t wide_chunk_t;
template<typename T>
struct chunk_traits {
@@ -376,10 +377,12 @@ struct value : public expr_base<value<Bits>> {
: data[chunks - 1 - n] << (chunk::bits - shift_bits);
}
if (Signed && is_neg()) {
- for (size_t n = chunks - shift_chunks; n < chunks; n++)
+ size_t top_chunk_idx = (Bits - shift_bits) / chunk::bits;
+ size_t top_chunk_bits = (Bits - shift_bits) % chunk::bits;
+ for (size_t n = top_chunk_idx + 1; n < chunks; n++)
result.data[n] = chunk::mask;
if (shift_bits != 0)
- result.data[chunks - shift_chunks] |= chunk::mask << (chunk::bits - shift_bits);
+ result.data[top_chunk_idx] |= chunk::mask << top_chunk_bits;
}
return result;
}
@@ -452,6 +455,24 @@ struct value : public expr_base<value<Bits>> {
bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg());
return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b
}
+
+ template<size_t ResultBits>
+ value<ResultBits> mul(const value<Bits> &other) const {
+ value<ResultBits> result;
+ wide_chunk_t wide_result[result.chunks + 1] = {};
+ for (size_t n = 0; n < chunks; n++) {
+ for (size_t m = 0; m < chunks && n + m < result.chunks; m++) {
+ wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]);
+ wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits;
+ wide_result[n + m] &= chunk::mask;
+ }
+ }
+ for (size_t n = 0; n < result.chunks; n++) {
+ result.data[n] = wide_result[n];
+ }
+ result.data[result.chunks - 1] &= result.msb_mask;
+ return result;
+ }
};
// Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here.
@@ -1304,28 +1325,14 @@ value<BitsY> sub_ss(const value<BitsA> &a, const value<BitsB> &b) {
template<size_t BitsY, size_t BitsA, size_t BitsB>
CXXRTL_ALWAYS_INLINE
value<BitsY> mul_uu(const value<BitsA> &a, const value<BitsB> &b) {
- value<BitsY> product;
- value<BitsY> multiplicand = a.template zcast<BitsY>();
- const value<BitsB> &multiplier = b;
- uint32_t multiplicand_shift = 0;
- for (size_t step = 0; step < BitsB; step++) {
- if (multiplier.bit(step)) {
- multiplicand = multiplicand.shl(value<32> { multiplicand_shift });
- product = product.add(multiplicand);
- multiplicand_shift = 0;
- }
- multiplicand_shift++;
- }
- return product;
+ constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB;
+ return a.template zcast<BitsM>().template mul<BitsY>(b.template zcast<BitsM>());
}
template<size_t BitsY, size_t BitsA, size_t BitsB>
CXXRTL_ALWAYS_INLINE
value<BitsY> mul_ss(const value<BitsA> &a, const value<BitsB> &b) {
- value<BitsB + 1> ub = b.template sext<BitsB + 1>();
- if (ub.is_neg()) ub = ub.neg();
- value<BitsY> y = mul_uu<BitsY>(a.template scast<BitsY>(), ub);
- return b.is_neg() ? y.neg() : y;
+ return a.template scast<BitsY>().template mul<BitsY>(b.template scast<BitsY>());
}
template<size_t BitsY, size_t BitsA, size_t BitsB>