diff options
Diffstat (limited to 'backends/cxxrtl')
-rw-r--r-- | backends/cxxrtl/cxxrtl.h | 365 | ||||
-rw-r--r-- | backends/cxxrtl/cxxrtl_backend.cc | 498 | ||||
-rw-r--r-- | backends/cxxrtl/cxxrtl_capi.cc | 25 | ||||
-rw-r--r-- | backends/cxxrtl/cxxrtl_capi.h | 129 | ||||
-rw-r--r-- | backends/cxxrtl/cxxrtl_vcd.h | 46 | ||||
-rw-r--r-- | backends/cxxrtl/cxxrtl_vcd_capi.h | 4 |
6 files changed, 805 insertions, 262 deletions
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index c988c9e80..41089a153 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -17,6 +17,11 @@ */ // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself. +// +// The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides +// composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass +// to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler +// to unwrap the abstraction and generate efficient code. #ifndef CXXRTL_H #define CXXRTL_H @@ -35,10 +40,19 @@ #include <backends/cxxrtl/cxxrtl_capi.h> -// The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides -// composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass -// to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler -// to unwrap the abstraction and generate efficient code. +// CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. +// It generates a lot of specialized template functions with relatively large bodies that, when inlined +// into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. +// Because of this, most of the CXXRTL runtime must be always inlined for best performance. +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif +#if __has_attribute(always_inline) +#define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) +#else +#define CXXRTL_ALWAYS_INLINE inline +#endif + namespace cxxrtl { // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size @@ -52,6 +66,7 @@ namespace cxxrtl { // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be // clobbered results in simpler generated code. typedef uint32_t chunk_t; +typedef uint64_t wide_chunk_t; template<typename T> struct chunk_traits { @@ -85,6 +100,7 @@ struct value : public expr_base<value<Bits>> { value<Bits> &operator=(const value<Bits> &) = default; // A (no-op) helper that forces the cast to value<>. + CXXRTL_ALWAYS_INLINE const value<Bits> &val() const { return *this; } @@ -95,12 +111,42 @@ struct value : public expr_base<value<Bits>> { return ss.str(); } + // Conversion operations. + // + // These functions ensure that a conversion is never out of range, and should be always used, if at all + // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and + // .concat() can be used to split them into more manageable parts. + template<class IntegerT> + CXXRTL_ALWAYS_INLINE + IntegerT get() const { + static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed, + "get<T>() requires T to be an unsigned integral type"); + static_assert(std::numeric_limits<IntegerT>::digits >= Bits, + "get<T>() requires T to be at least as wide as the value is"); + IntegerT result = 0; + for (size_t n = 0; n < chunks; n++) + result |= IntegerT(data[n]) << (n * chunk::bits); + return result; + } + + template<class IntegerT> + CXXRTL_ALWAYS_INLINE + void set(IntegerT other) { + static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed, + "set<T>() requires T to be an unsigned integral type"); + static_assert(std::numeric_limits<IntegerT>::digits >= Bits, + "set<T>() requires the value to be at least as wide as T is"); + for (size_t n = 0; n < chunks; n++) + data[n] = (other >> (n * chunk::bits)) & chunk::mask; + } + // Operations with compile-time parameters. // // These operations are used to implement slicing, concatenation, and blitting. // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left); // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right). template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> trunc() const { static_assert(NewBits <= Bits, "trunc() may not increase width"); value<NewBits> result; @@ -111,6 +157,7 @@ struct value : public expr_base<value<Bits>> { } template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> zext() const { static_assert(NewBits >= Bits, "zext() may not decrease width"); value<NewBits> result; @@ -120,6 +167,7 @@ struct value : public expr_base<value<Bits>> { } template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> sext() const { static_assert(NewBits >= Bits, "sext() may not decrease width"); value<NewBits> result; @@ -135,6 +183,7 @@ struct value : public expr_base<value<Bits>> { } template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> rtrunc() const { static_assert(NewBits <= Bits, "rtrunc() may not increase width"); value<NewBits> result; @@ -154,6 +203,7 @@ struct value : public expr_base<value<Bits>> { } template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> rzext() const { static_assert(NewBits >= Bits, "rzext() may not decrease width"); value<NewBits> result; @@ -165,13 +215,14 @@ struct value : public expr_base<value<Bits>> { carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } - if (carry != 0) - result.data[result.chunks - 1] = carry; + if (shift_chunks + chunks < result.chunks) + result.data[shift_chunks + chunks] = carry; return result; } // Bit blit operation, i.e. a partial read-modify-write. template<size_t Stop, size_t Start> + CXXRTL_ALWAYS_INLINE value<Bits> blit(const value<Stop - Start + 1> &source) const { static_assert(Stop >= Start, "blit() may not reverse bit order"); constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits)); @@ -196,6 +247,7 @@ struct value : public expr_base<value<Bits>> { // than the operand. In C++17 these can be replaced with `if constexpr`. template<size_t NewBits, typename = void> struct zext_cast { + CXXRTL_ALWAYS_INLINE value<NewBits> operator()(const value<Bits> &val) { return val.template zext<NewBits>(); } @@ -203,6 +255,7 @@ struct value : public expr_base<value<Bits>> { template<size_t NewBits> struct zext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> { + CXXRTL_ALWAYS_INLINE value<NewBits> operator()(const value<Bits> &val) { return val.template trunc<NewBits>(); } @@ -210,6 +263,7 @@ struct value : public expr_base<value<Bits>> { template<size_t NewBits, typename = void> struct sext_cast { + CXXRTL_ALWAYS_INLINE value<NewBits> operator()(const value<Bits> &val) { return val.template sext<NewBits>(); } @@ -217,17 +271,20 @@ struct value : public expr_base<value<Bits>> { template<size_t NewBits> struct sext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> { + CXXRTL_ALWAYS_INLINE value<NewBits> operator()(const value<Bits> &val) { return val.template trunc<NewBits>(); } }; template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> zcast() const { return zext_cast<NewBits>()(*this); } template<size_t NewBits> + CXXRTL_ALWAYS_INLINE value<NewBits> scast() const { return sext_cast<NewBits>()(*this); } @@ -246,6 +303,10 @@ struct value : public expr_base<value<Bits>> { data[offset_chunks] |= value ? 1 << offset_bits : 0; } + explicit operator bool() const { + return !is_zero(); + } + bool is_zero() const { for (size_t n = 0; n < chunks; n++) if (data[n] != 0) @@ -253,10 +314,6 @@ struct value : public expr_base<value<Bits>> { return true; } - explicit operator bool() const { - return !is_zero(); - } - bool is_neg() const { return data[chunks - 1] & (1 << ((Bits - 1) % chunk::bits)); } @@ -349,10 +406,12 @@ struct value : public expr_base<value<Bits>> { : data[chunks - 1 - n] << (chunk::bits - shift_bits); } if (Signed && is_neg()) { - for (size_t n = chunks - shift_chunks; n < chunks; n++) + size_t top_chunk_idx = (Bits - shift_bits) / chunk::bits; + size_t top_chunk_bits = (Bits - shift_bits) % chunk::bits; + for (size_t n = top_chunk_idx + 1; n < chunks; n++) result.data[n] = chunk::mask; if (shift_bits != 0) - result.data[chunks - shift_chunks] |= chunk::mask << (chunk::bits - shift_bits); + result.data[top_chunk_idx] |= chunk::mask << top_chunk_bits; } return result; } @@ -393,10 +452,11 @@ struct value : public expr_base<value<Bits>> { bool carry = CarryIn; for (size_t n = 0; n < result.chunks; n++) { result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry; + if (result.chunks - 1 == n) + result.data[result.chunks - 1] &= result.msb_mask; carry = (result.data[n] < data[n]) || (result.data[n] == data[n] && carry); } - result.data[result.chunks - 1] &= result.msb_mask; return {result, carry}; } @@ -425,6 +485,24 @@ struct value : public expr_base<value<Bits>> { bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg()); return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b } + + template<size_t ResultBits> + value<ResultBits> mul(const value<Bits> &other) const { + value<ResultBits> result; + wide_chunk_t wide_result[result.chunks + 1] = {}; + for (size_t n = 0; n < chunks; n++) { + for (size_t m = 0; m < chunks && n + m < result.chunks; m++) { + wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]); + wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits; + wide_result[n + m] &= chunk::mask; + } + } + for (size_t n = 0; n < result.chunks; n++) { + result.data[n] = wide_result[n]; + } + result.data[result.chunks - 1] &= result.msb_mask; + return result; + } }; // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here. @@ -439,12 +517,14 @@ struct slice_expr : public expr_base<slice_expr<T, Stop, Start>> { slice_expr(T &expr) : expr(expr) {} slice_expr(const slice_expr<T, Stop, Start> &) = delete; + CXXRTL_ALWAYS_INLINE operator value<bits>() const { return static_cast<const value<T::bits> &>(expr) .template rtrunc<T::bits - Start>() .template trunc<bits>(); } + CXXRTL_ALWAYS_INLINE slice_expr<T, Stop, Start> &operator=(const value<bits> &rhs) { // Generic partial assignment implemented using a read-modify-write operation on the sliced expression. expr = static_cast<const value<T::bits> &>(expr) @@ -453,6 +533,7 @@ struct slice_expr : public expr_base<slice_expr<T, Stop, Start>> { } // A helper that forces the cast to value<>, which allows deduction to work. + CXXRTL_ALWAYS_INLINE value<bits> val() const { return static_cast<const value<bits> &>(*this); } @@ -469,6 +550,7 @@ struct concat_expr : public expr_base<concat_expr<T, U>> { concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {} concat_expr(const concat_expr<T, U> &) = delete; + CXXRTL_ALWAYS_INLINE operator value<bits>() const { value<bits> ms_shifted = static_cast<const value<T::bits> &>(ms_expr) .template rzext<bits>(); @@ -477,6 +559,7 @@ struct concat_expr : public expr_base<concat_expr<T, U>> { return ms_shifted.bit_or(ls_extended); } + CXXRTL_ALWAYS_INLINE concat_expr<T, U> &operator=(const value<bits> &rhs) { ms_expr = rhs.template rtrunc<T::bits>(); ls_expr = rhs.template trunc<U::bits>(); @@ -484,6 +567,7 @@ struct concat_expr : public expr_base<concat_expr<T, U>> { } // A helper that forces the cast to value<>, which allows deduction to work. + CXXRTL_ALWAYS_INLINE value<bits> val() const { return static_cast<const value<bits> &>(*this); } @@ -508,21 +592,25 @@ struct concat_expr : public expr_base<concat_expr<T, U>> { template<class T> struct expr_base { template<size_t Stop, size_t Start = Stop> + CXXRTL_ALWAYS_INLINE slice_expr<const T, Stop, Start> slice() const { return {*static_cast<const T *>(this)}; } template<size_t Stop, size_t Start = Stop> + CXXRTL_ALWAYS_INLINE slice_expr<T, Stop, Start> slice() { return {*static_cast<T *>(this)}; } template<class U> + CXXRTL_ALWAYS_INLINE concat_expr<const T, typename std::remove_reference<const U>::type> concat(const U &other) const { return {*static_cast<const T *>(this), other}; } template<class U> + CXXRTL_ALWAYS_INLINE concat_expr<T, typename std::remove_reference<U>::type> concat(U &&other) { return {*static_cast<T *>(this), other}; } @@ -563,6 +651,18 @@ struct wire { wire(wire<Bits> &&) = default; wire<Bits> &operator=(const wire<Bits> &) = delete; + template<class IntegerT> + CXXRTL_ALWAYS_INLINE + IntegerT get() const { + return curr.template get<IntegerT>(); + } + + template<class IntegerT> + CXXRTL_ALWAYS_INLINE + void set(IntegerT other) { + next.template set<IntegerT>(other); + } + bool commit() { if (curr != next) { curr = next; @@ -608,6 +708,7 @@ struct memory { // This utterly reprehensible construct is the most reasonable way to apply a function to every element // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list. auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...}; + (void)_; } // An operator for direct memory reads. May be used at any time during the simulation. @@ -676,10 +777,8 @@ struct metadata { // In debug mode, using the wrong .as_*() function will assert. // In release mode, using the wrong .as_*() function will safely return a default value. - union { - const unsigned uint_value = 0; - const signed sint_value; - }; + const unsigned uint_value = 0; + const signed sint_value = 0; const std::string string_value = ""; const double double_value = 0.0; @@ -716,68 +815,155 @@ struct metadata { typedef std::map<std::string, metadata> metadata_map; +// Helper class to disambiguate values/wires and their aliases. +struct debug_alias {}; + // This structure is intended for consumption via foreign function interfaces, like Python's ctypes. // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++. // // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used // in the C API, or it would not be possible to cast between the pointers to these. struct debug_item : ::cxxrtl_object { + // Object types. enum : uint32_t { VALUE = CXXRTL_VALUE, WIRE = CXXRTL_WIRE, MEMORY = CXXRTL_MEMORY, + ALIAS = CXXRTL_ALIAS, + }; + + // Object flags. + enum : uint32_t { + INPUT = CXXRTL_INPUT, + OUTPUT = CXXRTL_OUTPUT, + INOUT = CXXRTL_INOUT, + DRIVEN_SYNC = CXXRTL_DRIVEN_SYNC, + DRIVEN_COMB = CXXRTL_DRIVEN_COMB, + UNDRIVEN = CXXRTL_UNDRIVEN, }; debug_item(const ::cxxrtl_object &object) : cxxrtl_object(object) {} template<size_t Bits> - debug_item(value<Bits> &item) { + debug_item(value<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), "value<Bits> is not compatible with C layout"); - type = VALUE; - width = Bits; - depth = 1; - curr = item.data; - next = item.data; + type = VALUE; + flags = flags_; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = item.data; + next = item.data; } template<size_t Bits> - debug_item(const value<Bits> &item) { + debug_item(const value<Bits> &item, size_t lsb_offset = 0) { static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), "value<Bits> is not compatible with C layout"); - type = VALUE; - width = Bits; - depth = 1; - curr = const_cast<uint32_t*>(item.data); - next = nullptr; + type = VALUE; + flags = DRIVEN_COMB; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = const_cast<chunk_t*>(item.data); + next = nullptr; } template<size_t Bits> - debug_item(wire<Bits> &item) { + debug_item(wire<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) && sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t), "wire<Bits> is not compatible with C layout"); - type = WIRE; - width = Bits; - depth = 1; - curr = item.curr.data; - next = item.next.data; + type = WIRE; + flags = flags_; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = item.curr.data; + next = item.next.data; } template<size_t Width> - debug_item(memory<Width> &item) { + debug_item(memory<Width> &item, size_t zero_offset = 0) { static_assert(sizeof(item.data[0]) == value<Width>::chunks * sizeof(chunk_t), "memory<Width> is not compatible with C layout"); - type = MEMORY; - width = Width; - depth = item.data.size(); - curr = item.data.empty() ? nullptr : item.data[0].data; - next = nullptr; + type = MEMORY; + flags = 0; + width = Width; + lsb_at = 0; + depth = item.data.size(); + zero_at = zero_offset; + curr = item.data.empty() ? nullptr : item.data[0].data; + next = nullptr; + } + + template<size_t Bits> + debug_item(debug_alias, const value<Bits> &item, size_t lsb_offset = 0) { + static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), + "value<Bits> is not compatible with C layout"); + type = ALIAS; + flags = DRIVEN_COMB; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = const_cast<chunk_t*>(item.data); + next = nullptr; + } + + template<size_t Bits> + debug_item(debug_alias, const wire<Bits> &item, size_t lsb_offset = 0) { + static_assert(sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) && + sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t), + "wire<Bits> is not compatible with C layout"); + type = ALIAS; + flags = DRIVEN_COMB; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = const_cast<chunk_t*>(item.curr.data); + next = nullptr; } }; static_assert(std::is_standard_layout<debug_item>::value, "debug_item is not compatible with C layout"); -typedef std::map<std::string, debug_item> debug_items; +struct debug_items { + std::map<std::string, std::vector<debug_item>> table; + + void add(const std::string &name, debug_item &&item) { + std::vector<debug_item> &parts = table[name]; + parts.emplace_back(item); + std::sort(parts.begin(), parts.end(), + [](const debug_item &a, const debug_item &b) { + return a.lsb_at < b.lsb_at; + }); + } + + size_t count(const std::string &name) const { + if (table.count(name) == 0) + return 0; + return table.at(name).size(); + } + + const std::vector<debug_item> &parts_at(const std::string &name) const { + return table.at(name); + } + + const debug_item &at(const std::string &name) const { + const std::vector<debug_item> &parts = table.at(name); + assert(parts.size() == 1); + return parts.at(0); + } + + const debug_item &operator [](const std::string &name) const { + return at(name); + } +}; struct module { module() {} @@ -799,7 +985,9 @@ struct module { return deltas; } - virtual void debug_info(debug_items &items, std::string path = "") {} + virtual void debug_info(debug_items &items, std::string path = "") { + (void)items, (void)path; + } }; } // namespace cxxrtl @@ -823,271 +1011,322 @@ using namespace cxxrtl; // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own. template<class T> +CXXRTL_ALWAYS_INLINE constexpr T max(const T &a, const T &b) { return a > b ? a : b; } // Logic operations template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> logic_not(const value<BitsA> &a) { return value<BitsY> { a ? 0u : 1u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> logic_and(const value<BitsA> &a, const value<BitsB> &b) { - return value<BitsY> { (bool(a) & bool(b)) ? 1u : 0u }; + return value<BitsY> { (bool(a) && bool(b)) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> logic_or(const value<BitsA> &a, const value<BitsB> &b) { - return value<BitsY> { (bool(a) | bool(b)) ? 1u : 0u }; + return value<BitsY> { (bool(a) || bool(b)) ? 1u : 0u }; } // Reduction operations template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> reduce_and(const value<BitsA> &a) { return value<BitsY> { a.bit_not().is_zero() ? 1u : 0u }; } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> reduce_or(const value<BitsA> &a) { return value<BitsY> { a ? 1u : 0u }; } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> reduce_xor(const value<BitsA> &a) { return value<BitsY> { (a.ctpop() % 2) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> reduce_xnor(const value<BitsA> &a) { return value<BitsY> { (a.ctpop() % 2) ? 0u : 1u }; } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> reduce_bool(const value<BitsA> &a) { return value<BitsY> { a ? 1u : 0u }; } // Bitwise operations template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> not_u(const value<BitsA> &a) { return a.template zcast<BitsY>().bit_not(); } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> not_s(const value<BitsA> &a) { return a.template scast<BitsY>().bit_not(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> and_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().bit_and(b.template zcast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> and_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().bit_and(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> or_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().bit_or(b.template zcast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> or_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().bit_or(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> xor_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> xor_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> xnor_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>()).bit_not(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> xnor_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>()).bit_not(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shl_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().template shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shl_su(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().template shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sshl_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().template shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sshl_su(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().template shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shr_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template shr(b).template zcast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shr_su(const value<BitsA> &a, const value<BitsB> &b) { return a.template shr(b).template scast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sshr_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template shr(b).template zcast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sshr_su(const value<BitsA> &a, const value<BitsB> &b) { return a.template sshr(b).template scast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shift_uu(const value<BitsA> &a, const value<BitsB> &b) { return shr_uu<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shift_su(const value<BitsA> &a, const value<BitsB> &b) { return shr_su<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shift_us(const value<BitsA> &a, const value<BitsB> &b) { return b.is_neg() ? shl_uu<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_uu<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shift_ss(const value<BitsA> &a, const value<BitsB> &b) { return b.is_neg() ? shl_su<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_su<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shiftx_uu(const value<BitsA> &a, const value<BitsB> &b) { return shift_uu<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shiftx_su(const value<BitsA> &a, const value<BitsB> &b) { return shift_su<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shiftx_us(const value<BitsA> &a, const value<BitsB> &b) { return shift_us<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> shiftx_ss(const value<BitsA> &a, const value<BitsB> &b) { return shift_ss<BitsY>(a, b); } // Comparison operations template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> eq_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY>{ a.template zext<BitsExt>() == b.template zext<BitsExt>() ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> eq_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY>{ a.template sext<BitsExt>() == b.template sext<BitsExt>() ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> ne_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY>{ a.template zext<BitsExt>() != b.template zext<BitsExt>() ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> ne_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY>{ a.template sext<BitsExt>() != b.template sext<BitsExt>() ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> eqx_uu(const value<BitsA> &a, const value<BitsB> &b) { return eq_uu<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> eqx_ss(const value<BitsA> &a, const value<BitsB> &b) { return eq_ss<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> nex_uu(const value<BitsA> &a, const value<BitsB> &b) { return ne_uu<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> nex_ss(const value<BitsA> &a, const value<BitsB> &b) { return ne_ss<BitsY>(a, b); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> gt_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> gt_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> ge_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { !a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> ge_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { !a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> lt_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> lt_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> le_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { !b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u }; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> le_ss(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t BitsExt = max(BitsA, BitsB); return value<BitsY> { !b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u }; @@ -1095,71 +1334,68 @@ value<BitsY> le_ss(const value<BitsA> &a, const value<BitsB> &b) { // Arithmetic operations template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> pos_u(const value<BitsA> &a) { return a.template zcast<BitsY>(); } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> pos_s(const value<BitsA> &a) { return a.template scast<BitsY>(); } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> neg_u(const value<BitsA> &a) { return a.template zcast<BitsY>().neg(); } template<size_t BitsY, size_t BitsA> +CXXRTL_ALWAYS_INLINE value<BitsY> neg_s(const value<BitsA> &a) { return a.template scast<BitsY>().neg(); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> add_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().add(b.template zcast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> add_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().add(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sub_uu(const value<BitsA> &a, const value<BitsB> &b) { return a.template zcast<BitsY>().sub(b.template zcast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> sub_ss(const value<BitsA> &a, const value<BitsB> &b) { return a.template scast<BitsY>().sub(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> mul_uu(const value<BitsA> &a, const value<BitsB> &b) { - value<BitsY> product; - value<BitsY> multiplicand = a.template zcast<BitsY>(); - const value<BitsB> &multiplier = b; - uint32_t multiplicand_shift = 0; - for (size_t step = 0; step < BitsB; step++) { - if (multiplier.bit(step)) { - multiplicand = multiplicand.shl(value<32> { multiplicand_shift }); - product = product.add(multiplicand); - multiplicand_shift = 0; - } - multiplicand_shift++; - } - return product; + constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB; + return a.template zcast<BitsM>().template mul<BitsY>(b.template zcast<BitsM>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> mul_ss(const value<BitsA> &a, const value<BitsB> &b) { - value<BitsB + 1> ub = b.template sext<BitsB + 1>(); - if (ub.is_neg()) ub = ub.neg(); - value<BitsY> y = mul_uu<BitsY>(a.template scast<BitsY>(), ub); - return b.is_neg() ? y.neg() : y; + return a.template scast<BitsY>().template mul<BitsY>(b.template scast<BitsY>()); } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE std::pair<value<BitsY>, value<BitsY>> divmod_uu(const value<BitsA> &a, const value<BitsB> &b) { constexpr size_t Bits = max(BitsY, max(BitsA, BitsB)); value<Bits> quotient; @@ -1181,6 +1417,7 @@ std::pair<value<BitsY>, value<BitsY>> divmod_uu(const value<BitsA> &a, const val } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE std::pair<value<BitsY>, value<BitsY>> divmod_ss(const value<BitsA> &a, const value<BitsB> &b) { value<BitsA + 1> ua = a.template sext<BitsA + 1>(); value<BitsB + 1> ub = b.template sext<BitsB + 1>(); @@ -1194,21 +1431,25 @@ std::pair<value<BitsY>, value<BitsY>> divmod_ss(const value<BitsA> &a, const val } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> div_uu(const value<BitsA> &a, const value<BitsB> &b) { return divmod_uu<BitsY>(a, b).first; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> div_ss(const value<BitsA> &a, const value<BitsB> &b) { return divmod_ss<BitsY>(a, b).first; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> mod_uu(const value<BitsA> &a, const value<BitsB> &b) { return divmod_uu<BitsY>(a, b).second; } template<size_t BitsY, size_t BitsA, size_t BitsB> +CXXRTL_ALWAYS_INLINE value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) { return divmod_ss<BitsY>(a, b).second; } diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 4c04a2f14..a48ea5b23 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -22,6 +22,7 @@ #include "kernel/sigtools.h" #include "kernel/utils.h" #include "kernel/celltypes.h" +#include "kernel/mem.h" #include "kernel/log.h" USING_YOSYS_NAMESPACE @@ -171,11 +172,6 @@ struct Scheduler { } }; -bool is_input_wire(const RTLIL::Wire *wire) -{ - return wire->port_input && !wire->port_output; -} - bool is_unary_cell(RTLIL::IdString type) { return type.in( @@ -202,19 +198,15 @@ bool is_extending_cell(RTLIL::IdString type) bool is_elidable_cell(RTLIL::IdString type) { return is_unary_cell(type) || is_binary_cell(type) || type.in( - ID($mux), ID($concat), ID($slice)); -} - -bool is_sync_ff_cell(RTLIL::IdString type) -{ - return type.in( - ID($dff), ID($dffe)); + ID($mux), ID($concat), ID($slice), ID($pmux)); } bool is_ff_cell(RTLIL::IdString type) { - return is_sync_ff_cell(type) || type.in( - ID($adff), ID($dffsr), ID($dlatch), ID($dlatchsr), ID($sr)); + return type.in( + ID($dff), ID($dffe), ID($sdff), ID($sdffe), ID($sdffce), + ID($adff), ID($adffe), ID($dffsr), ID($dffsre), + ID($dlatch), ID($adlatch), ID($dlatchsr), ID($sr)); } bool is_internal_cell(RTLIL::IdString type) @@ -282,6 +274,7 @@ struct FlowGraph { std::vector<Node*> nodes; dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses; dict<const RTLIL::Wire*, bool> wire_def_elidable, wire_use_elidable; + dict<RTLIL::SigBit, bool> bit_has_state; ~FlowGraph() { @@ -289,17 +282,24 @@ struct FlowGraph { delete node; } - void add_defs(Node *node, const RTLIL::SigSpec &sig, bool fully_sync, bool elidable) + void add_defs(Node *node, const RTLIL::SigSpec &sig, bool is_ff, bool elidable) { for (auto chunk : sig.chunks()) if (chunk.wire) { - if (fully_sync) + if (is_ff) { + // A sync def means that a wire holds design state because it is driven directly by + // a flip-flop output. Such a wire can never be unbuffered. wire_sync_defs[chunk.wire].insert(node); - else + } else { + // A comb def means that a wire doesn't hold design state. It might still be connected, + // indirectly, to a flip-flop output. wire_comb_defs[chunk.wire].insert(node); + } } + for (auto bit : sig.bits()) + bit_has_state[bit] |= is_ff; // Only comb defs of an entire wire in the right order can be elided. - if (!fully_sync && sig.is_wire()) + if (!is_ff && sig.is_wire()) wire_def_elidable[sig.as_wire()] = elidable; } @@ -327,7 +327,7 @@ struct FlowGraph { // Connections void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn) { - add_defs(node, conn.first, /*fully_sync=*/false, /*elidable=*/true); + add_defs(node, conn.first, /*is_ff=*/false, /*elidable=*/true); add_uses(node, conn.second); } @@ -374,7 +374,7 @@ struct FlowGraph { if (cell->output(conn.first)) if (is_cxxrtl_sync_port(cell, conn.first)) { // See note regarding elidability below. - add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); } } @@ -383,18 +383,18 @@ struct FlowGraph { for (auto conn : cell->connections()) { if (cell->output(conn.first)) { if (is_elidable_cell(cell->type)) - add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/true); - else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) - add_defs(node, conn.second, /*fully_sync=*/true, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/true); + else if (is_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) + add_defs(node, conn.second, /*is_ff=*/true, /*elidable=*/false); else if (is_internal_cell(cell->type)) - add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); else if (!is_cxxrtl_sync_port(cell, conn.first)) { // Although at first it looks like outputs of user-defined cells may always be elided, the reality is // more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb // outputs are assigned in a different way depending on whether the cell's eval() immediately converged. // Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify // the infrastructure required to elide outputs of cells with many of them. - add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); + add_defs(node, conn.second, /*is_ff=*/false, /*elidable=*/false); } } if (cell->input(conn.first)) @@ -432,7 +432,7 @@ struct FlowGraph { void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_) { for (auto &action : case_->actions) { - add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/false, /*elidable=*/false); add_uses(node, action.second); } for (auto sub_switch : case_->switches) { @@ -451,9 +451,9 @@ struct FlowGraph { for (auto sync : process->syncs) for (auto action : sync->actions) { if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe) - add_defs(node, action.first, /*is_sync=*/true, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/true, /*elidable=*/false); else - add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); + add_defs(node, action.first, /*is_ff=*/false, /*elidable=*/false); add_uses(node, action.second); } } @@ -527,12 +527,16 @@ struct CxxrtlWorker { std::ostream *impl_f = nullptr; std::ostream *intf_f = nullptr; - bool elide_internal = false; - bool elide_public = false; + bool run_hierarchy = false; + bool run_flatten = false; + bool run_proc = false; + + bool unbuffer_internal = false; + bool unbuffer_public = false; bool localize_internal = false; bool localize_public = false; - bool run_proc_flatten = false; - bool max_opt_level = false; + bool elide_internal = false; + bool elide_public = false; bool debug_info = false; @@ -547,9 +551,11 @@ struct CxxrtlWorker { dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for; dict<const RTLIL::Wire*, FlowGraph::Node> elided_wires; dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule; + pool<const RTLIL::Wire*> unbuffered_wires; pool<const RTLIL::Wire*> localized_wires; dict<const RTLIL::Wire*, const RTLIL::Wire*> debug_alias_wires; dict<const RTLIL::Wire*, RTLIL::Const> debug_const_wires; + dict<RTLIL::SigBit, bool> bit_has_state; dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations; dict<const RTLIL::Module*, bool> eval_converges; @@ -786,7 +792,8 @@ struct CxxrtlWorker { dump_const(chunk.data, chunk.width, chunk.offset); return false; } else { - if (!is_lhs && elided_wires.count(chunk.wire)) { + if (elided_wires.count(chunk.wire)) { + log_assert(!is_lhs); const FlowGraph::Node &node = elided_wires[chunk.wire]; switch (node.type) { case FlowGraph::Node::Type::CONNECT: @@ -799,7 +806,7 @@ struct CxxrtlWorker { default: log_assert(false); } - } else if (localized_wires[chunk.wire] || is_input_wire(chunk.wire)) { + } else if (unbuffered_wires[chunk.wire]) { f << mangle(chunk.wire); } else { f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr"); @@ -942,6 +949,21 @@ struct CxxrtlWorker { f << " : "; dump_sigspec_rhs(cell->getPort(ID::A)); f << ")"; + // Parallel (one-hot) muxes + } else if (cell->type == ID($pmux)) { + int width = cell->getParam(ID::WIDTH).as_int(); + int s_width = cell->getParam(ID::S_WIDTH).as_int(); + for (int part = 0; part < s_width; part++) { + f << "("; + dump_sigspec_rhs(cell->getPort(ID::S).extract(part)); + f << " ? "; + dump_sigspec_rhs(cell->getPort(ID::B).extract(part * width, width)); + f << " : "; + } + dump_sigspec_rhs(cell->getPort(ID::A)); + for (int part = 0; part < s_width; part++) { + f << ")"; + } // Concats } else if (cell->type == ID($concat)) { dump_sigspec_rhs(cell->getPort(ID::B)); @@ -1008,35 +1030,6 @@ struct CxxrtlWorker { f << " = "; dump_cell_elided(cell); f << ";\n"; - // Parallel (one-hot) muxes - } else if (cell->type == ID($pmux)) { - int width = cell->getParam(ID::WIDTH).as_int(); - int s_width = cell->getParam(ID::S_WIDTH).as_int(); - bool first = true; - for (int part = 0; part < s_width; part++) { - f << (first ? indent : " else "); - first = false; - f << "if ("; - dump_sigspec_rhs(cell->getPort(ID::S).extract(part)); - f << ") {\n"; - inc_indent(); - f << indent; - dump_sigspec_lhs(cell->getPort(ID::Y)); - f << " = "; - dump_sigspec_rhs(cell->getPort(ID::B).extract(part * width, width)); - f << ";\n"; - dec_indent(); - f << indent << "}"; - } - f << " else {\n"; - inc_indent(); - f << indent; - dump_sigspec_lhs(cell->getPort(ID::Y)); - f << " = "; - dump_sigspec_rhs(cell->getPort(ID::A)); - f << ";\n"; - dec_indent(); - f << indent << "}\n"; // Flip-flops } else if (is_ff_cell(cell->type)) { if (cell->hasPort(ID::CLK) && cell->getPort(ID::CLK).is_wire()) { @@ -1046,7 +1039,7 @@ struct CxxrtlWorker { f << indent << "if (" << (cell->getParam(ID::CLK_POLARITY).as_bool() ? "posedge_" : "negedge_") << mangle(clk_bit) << ") {\n"; inc_indent(); - if (cell->type == ID($dffe)) { + if (cell->hasPort(ID::EN)) { f << indent << "if ("; dump_sigspec_rhs(cell->getPort(ID::EN)); f << " == value<1> {" << cell->getParam(ID::EN_POLARITY).as_bool() << "u}) {\n"; @@ -1057,7 +1050,24 @@ struct CxxrtlWorker { f << " = "; dump_sigspec_rhs(cell->getPort(ID::D)); f << ";\n"; - if (cell->type == ID($dffe)) { + if (cell->hasPort(ID::EN) && cell->type != ID($sdffce)) { + dec_indent(); + f << indent << "}\n"; + } + if (cell->hasPort(ID::SRST)) { + f << indent << "if ("; + dump_sigspec_rhs(cell->getPort(ID::SRST)); + f << " == value<1> {" << cell->getParam(ID::SRST_POLARITY).as_bool() << "u}) {\n"; + inc_indent(); + f << indent; + dump_sigspec_lhs(cell->getPort(ID::Q)); + f << " = "; + dump_const(cell->getParam(ID::SRST_VALUE)); + f << ";\n"; + dec_indent(); + f << indent << "}\n"; + } + if (cell->hasPort(ID::EN) && cell->type == ID($sdffce)) { dec_indent(); f << indent << "}\n"; } @@ -1139,7 +1149,7 @@ struct CxxrtlWorker { } // The generated code has two bounds checks; one in an assertion, and another that guards the read. // This is done so that the code does not invoke undefined behavior under any conditions, but nevertheless - // loudly crashes if an illegal condition is encountered. The assert may be turned off with -NDEBUG not + // loudly crashes if an illegal condition is encountered. The assert may be turned off with -DNDEBUG not // just for release builds, but also to make sure the simulator (which is presumably embedded in some // larger program) will never crash the code that calls into it. // @@ -1148,31 +1158,33 @@ struct CxxrtlWorker { f << indent << "if(" << valid_index_temp << ".valid) {\n"; inc_indent(); if (writable_memories[memory]) { - std::string addr_temp = fresh_temporary(); - f << indent << "const value<" << cell->getPort(ID::ADDR).size() << "> &" << addr_temp << " = "; - dump_sigspec_rhs(cell->getPort(ID::ADDR)); - f << ";\n"; std::string lhs_temp = fresh_temporary(); f << indent << "value<" << memory->width << "> " << lhs_temp << " = " << mangle(memory) << "[" << valid_index_temp << ".index];\n"; std::vector<const RTLIL::Cell*> memwr_cells(transparent_for[cell].begin(), transparent_for[cell].end()); - std::sort(memwr_cells.begin(), memwr_cells.end(), - [](const RTLIL::Cell *a, const RTLIL::Cell *b) { - return a->getParam(ID::PRIORITY).as_int() < b->getParam(ID::PRIORITY).as_int(); - }); - for (auto memwr_cell : memwr_cells) { - f << indent << "if (" << addr_temp << " == "; - dump_sigspec_rhs(memwr_cell->getPort(ID::ADDR)); - f << ") {\n"; - inc_indent(); - f << indent << lhs_temp << " = " << lhs_temp; - f << ".update("; - dump_sigspec_rhs(memwr_cell->getPort(ID::DATA)); - f << ", "; - dump_sigspec_rhs(memwr_cell->getPort(ID::EN)); - f << ");\n"; - dec_indent(); - f << indent << "}\n"; + if (!memwr_cells.empty()) { + std::string addr_temp = fresh_temporary(); + f << indent << "const value<" << cell->getPort(ID::ADDR).size() << "> &" << addr_temp << " = "; + dump_sigspec_rhs(cell->getPort(ID::ADDR)); + f << ";\n"; + std::sort(memwr_cells.begin(), memwr_cells.end(), + [](const RTLIL::Cell *a, const RTLIL::Cell *b) { + return a->getParam(ID::PRIORITY).as_int() < b->getParam(ID::PRIORITY).as_int(); + }); + for (auto memwr_cell : memwr_cells) { + f << indent << "if (" << addr_temp << " == "; + dump_sigspec_rhs(memwr_cell->getPort(ID::ADDR)); + f << ") {\n"; + inc_indent(); + f << indent << lhs_temp << " = " << lhs_temp; + f << ".update("; + dump_sigspec_rhs(memwr_cell->getPort(ID::DATA)); + f << ", "; + dump_sigspec_rhs(memwr_cell->getPort(ID::EN)); + f << ");\n"; + dec_indent(); + f << indent << "}\n"; + } } f << indent; dump_sigspec_lhs(cell->getPort(ID::DATA)); @@ -1434,13 +1446,12 @@ struct CxxrtlWorker { { if (elided_wires.count(wire)) return; - if (localized_wires.count(wire) != is_local_context) - return; - if (is_local_context) { + if (localized_wires[wire] && is_local_context) { dump_attrs(wire); f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n"; - } else { + } + if (!localized_wires[wire] && !is_local_context) { std::string width; if (wire->module->has_attribute(ID(cxxrtl_blackbox)) && wire->has_attribute(ID(cxxrtl_width))) { width = wire->get_string_attribute(ID(cxxrtl_width)); @@ -1449,14 +1460,21 @@ struct CxxrtlWorker { } dump_attrs(wire); - f << indent << (is_input_wire(wire) ? "value" : "wire") << "<" << width << "> " << mangle(wire); + f << indent; + if (wire->port_input && wire->port_output) + f << "/*inout*/ "; + else if (wire->port_input) + f << "/*input*/ "; + else if (wire->port_output) + f << "/*output*/ "; + f << (unbuffered_wires[wire] ? "value" : "wire") << "<" << width << "> " << mangle(wire); if (wire->has_attribute(ID::init)) { f << " "; dump_const_init(wire->attributes.at(ID::init)); } f << ";\n"; if (edge_wires[wire]) { - if (is_input_wire(wire)) { + if (unbuffered_wires[wire]) { f << indent << "value<" << width << "> prev_" << mangle(wire); if (wire->has_attribute(ID::init)) { f << " "; @@ -1467,7 +1485,7 @@ struct CxxrtlWorker { for (auto edge_type : edge_types) { if (edge_type.first.wire == wire) { std::string prev, next; - if (is_input_wire(wire)) { + if (unbuffered_wires[wire]) { prev = "prev_" + mangle(edge_type.first.wire); next = mangle(edge_type.first.wire); } else { @@ -1590,9 +1608,9 @@ struct CxxrtlWorker { inc_indent(); f << indent << "bool changed = false;\n"; for (auto wire : module->wires()) { - if (elided_wires.count(wire) || localized_wires.count(wire)) + if (elided_wires.count(wire)) continue; - if (is_input_wire(wire)) { + if (unbuffered_wires[wire]) { if (edge_wires[wire]) f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n"; continue; @@ -1619,57 +1637,122 @@ struct CxxrtlWorker { void dump_debug_info_method(RTLIL::Module *module) { + size_t count_public_wires = 0; size_t count_const_wires = 0; size_t count_alias_wires = 0; size_t count_member_wires = 0; size_t count_skipped_wires = 0; + size_t count_driven_sync = 0; + size_t count_driven_comb = 0; + size_t count_undriven = 0; + size_t count_mixed_driver = 0; inc_indent(); f << indent << "assert(path.empty() || path[path.size() - 1] == ' ');\n"; for (auto wire : module->wires()) { if (wire->name[0] != '\\') continue; + if (module->get_bool_attribute(ID(cxxrtl_blackbox)) && (wire->port_id == 0)) + continue; + count_public_wires++; if (debug_const_wires.count(wire)) { // Wire tied to a constant f << indent << "static const value<" << wire->width << "> const_" << mangle(wire) << " = "; dump_const(debug_const_wires[wire]); f << ";\n"; - f << indent << "items.emplace(path + " << escape_cxx_string(get_hdl_name(wire)); - f << ", debug_item(const_" << mangle(wire) << "));\n"; + f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire)); + f << ", debug_item(const_" << mangle(wire) << ", "; + f << wire->start_offset << "));\n"; count_const_wires++; } else if (debug_alias_wires.count(wire)) { // Alias of a member wire - f << indent << "items.emplace(path + " << escape_cxx_string(get_hdl_name(wire)); - f << ", debug_item(" << mangle(debug_alias_wires[wire]) << "));\n"; + f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire)); + f << ", debug_item(debug_alias(), " << mangle(debug_alias_wires[wire]) << ", "; + f << wire->start_offset << "));\n"; count_alias_wires++; } else if (!localized_wires.count(wire)) { // Member wire - f << indent << "items.emplace(path + " << escape_cxx_string(get_hdl_name(wire)); - f << ", debug_item(" << mangle(wire) << "));\n"; + std::vector<std::string> flags; + + if (wire->port_input && wire->port_output) + flags.push_back("INOUT"); + else if (wire->port_input) + flags.push_back("INPUT"); + else if (wire->port_output) + flags.push_back("OUTPUT"); + + bool has_driven_sync = false; + bool has_driven_comb = false; + bool has_undriven = false; + SigSpec sig(wire); + for (auto bit : sig.bits()) + if (!bit_has_state.count(bit)) + has_undriven = true; + else if (bit_has_state[bit]) + has_driven_sync = true; + else + has_driven_comb = true; + if (has_driven_sync) + flags.push_back("DRIVEN_SYNC"); + if (has_driven_sync && !has_driven_comb && !has_undriven) + count_driven_sync++; + if (has_driven_comb) + flags.push_back("DRIVEN_COMB"); + if (!has_driven_sync && has_driven_comb && !has_undriven) + count_driven_comb++; + if (has_undriven) + flags.push_back("UNDRIVEN"); + if (!has_driven_sync && !has_driven_comb && has_undriven) + count_undriven++; + if (has_driven_sync + has_driven_comb + has_undriven > 1) + count_mixed_driver++; + + f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(wire)); + f << ", debug_item(" << mangle(wire) << ", "; + f << wire->start_offset; + bool first = true; + for (auto flag : flags) { + if (first) { + first = false; + f << ", "; + } else { + f << "|"; + } + f << "debug_item::" << flag; + } + f << "));\n"; count_member_wires++; } else { count_skipped_wires++; } } - for (auto &memory_it : module->memories) { - if (memory_it.first[0] != '\\') - continue; - f << indent << "items.emplace(path + " << escape_cxx_string(get_hdl_name(memory_it.second)); - f << ", debug_item(" << mangle(memory_it.second) << "));\n"; - } - for (auto cell : module->cells()) { - if (is_internal_cell(cell->type)) - continue; - const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : "."; - f << indent << mangle(cell) << access << "debug_info(items, "; - f << "path + " << escape_cxx_string(get_hdl_name(cell) + ' ') << ");\n"; + if (!module->get_bool_attribute(ID(cxxrtl_blackbox))) { + for (auto &memory_it : module->memories) { + if (memory_it.first[0] != '\\') + continue; + f << indent << "items.add(path + " << escape_cxx_string(get_hdl_name(memory_it.second)); + f << ", debug_item(" << mangle(memory_it.second) << ", "; + f << memory_it.second->start_offset << "));\n"; + } + for (auto cell : module->cells()) { + if (is_internal_cell(cell->type)) + continue; + const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : "."; + f << indent << mangle(cell) << access << "debug_info(items, "; + f << "path + " << escape_cxx_string(get_hdl_name(cell) + ' ') << ");\n"; + } } dec_indent(); - log_debug("Debug information statistics for module %s:\n", log_id(module)); - log_debug(" Const wires: %zu\n", count_const_wires); - log_debug(" Alias wires: %zu\n", count_alias_wires); - log_debug(" Member wires: %zu\n", count_member_wires); - log_debug(" Other wires: %zu (no debug information)\n", count_skipped_wires); + log_debug("Debug information statistics for module `%s':\n", log_id(module)); + log_debug(" Public wires: %zu, of which:\n", count_public_wires); + log_debug(" Const wires: %zu\n", count_const_wires); + log_debug(" Alias wires: %zu\n", count_alias_wires); + log_debug(" Member wires: %zu, of which:\n", count_member_wires); + log_debug(" Driven sync: %zu\n", count_driven_sync); + log_debug(" Driven comb: %zu\n", count_driven_comb); + log_debug(" Undriven: %zu\n", count_undriven); + log_debug(" Mixed driver: %zu\n", count_mixed_driver); + log_debug(" Other wires: %zu (no debug information)\n", count_skipped_wires); } void dump_metadata_map(const dict<RTLIL::IdString, RTLIL::Const> &metadata_map) @@ -1840,7 +1923,8 @@ struct CxxrtlWorker { topo_design.edge(cell_module, module); } } - log_assert(topo_design.sort()); + bool no_loops = topo_design.sort(); + log_assert(no_loops); modules.insert(modules.end(), topo_design.sorted.begin(), topo_design.sorted.end()); if (split_intf) { @@ -1912,10 +1996,12 @@ struct CxxrtlWorker { f << "} // namespace " << design_ns << "\n"; f << "\n"; if (top_module != nullptr && debug_info) { + f << "extern \"C\"\n"; f << "cxxrtl_toplevel " << design_ns << "_create() {\n"; inc_indent(); + std::string top_type = design_ns + "::" + mangle(top_module); f << indent << "return new _cxxrtl_toplevel { "; - f << "std::make_unique<" << design_ns << "::" << mangle(top_module) << ">()"; + f << "std::unique_ptr<" << top_type << ">(new " + top_type + ")"; f << " };\n"; dec_indent(); f << "}\n"; @@ -1949,7 +2035,7 @@ struct CxxrtlWorker { void analyze_design(RTLIL::Design *design) { bool has_feedback_arcs = false; - bool has_buffered_wires = false; + bool has_buffered_comb_wires = false; for (auto module : design->modules()) { if (!design->selected_module(module)) @@ -1961,6 +2047,8 @@ struct CxxrtlWorker { if (module->get_bool_attribute(ID(cxxrtl_blackbox))) { for (auto port : module->ports) { RTLIL::Wire *wire = module->wire(port); + if (wire->port_input && !wire->port_output) + unbuffered_wires.insert(wire); if (wire->has_attribute(ID(cxxrtl_edge))) { RTLIL::Const edge_attr = wire->attributes[ID(cxxrtl_edge)]; if (!(edge_attr.flags & RTLIL::CONST_FLAG_STRING) || (int)edge_attr.decode_string().size() != GetSize(wire)) @@ -2016,7 +2104,7 @@ struct CxxrtlWorker { FlowGraph::Node *node = flow.add_node(cell); // Various DFF cells are treated like posedge/negedge processes, see above for details. - if (cell->type.in(ID($dff), ID($dffe), ID($adff), ID($dffsr))) { + if (cell->type.in(ID($dff), ID($dffe), ID($adff), ID($adffe), ID($dffsr), ID($dffsre), ID($sdff), ID($sdffe), ID($sdffce))) { if (cell->getPort(ID::CLK).is_wire()) register_edge_signal(sigmap, cell->getPort(ID::CLK), cell->parameters[ID::CLK_POLARITY].as_bool() ? RTLIL::STp : RTLIL::STn); @@ -2096,6 +2184,8 @@ struct CxxrtlWorker { if (wire->name.begins_with("$") && !elide_internal) continue; if (wire->name.begins_with("\\") && !elide_public) continue; if (edge_wires[wire]) continue; + if (flow.wire_comb_defs[wire].size() > 1) + log_cmd_error("Wire %s.%s has multiple drivers.\n", log_id(module), log_id(wire)); log_assert(flow.wire_comb_defs[wire].size() == 1); elided_wires[wire] = **flow.wire_comb_defs[wire].begin(); } @@ -2145,17 +2235,20 @@ struct CxxrtlWorker { log("Module `%s' contains feedback arcs through wires:\n", log_id(module)); for (auto wire : feedback_wires) log(" %s\n", log_id(wire)); - log("\n"); } for (auto wire : module->wires()) { if (feedback_wires[wire]) continue; - if (wire->port_id != 0) continue; + if (wire->port_output && !module->get_bool_attribute(ID::top)) continue; + if (wire->name.begins_with("$") && !unbuffer_internal) continue; + if (wire->name.begins_with("\\") && !unbuffer_public) continue; + if (flow.wire_sync_defs.count(wire) > 0) continue; + unbuffered_wires.insert(wire); + if (edge_wires[wire]) continue; if (wire->get_bool_attribute(ID::keep)) continue; + if (wire->port_input || wire->port_output) continue; if (wire->name.begins_with("$") && !localize_internal) continue; if (wire->name.begins_with("\\") && !localize_public) continue; - if (edge_wires[wire]) continue; - if (flow.wire_sync_defs.count(wire) > 0) continue; localized_wires.insert(wire); } @@ -2165,22 +2258,22 @@ struct CxxrtlWorker { // it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases // as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs // also require more than one delta cycle to converge. - pool<const RTLIL::Wire*> buffered_wires; + pool<const RTLIL::Wire*> buffered_comb_wires; for (auto wire : module->wires()) { - if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) { - if (!feedback_wires[wire]) - buffered_wires.insert(wire); - } + if (flow.wire_comb_defs[wire].size() > 0 && !unbuffered_wires[wire] && !feedback_wires[wire]) + buffered_comb_wires.insert(wire); } - if (!buffered_wires.empty()) { - has_buffered_wires = true; + if (!buffered_comb_wires.empty()) { + has_buffered_comb_wires = true; log("Module `%s' contains buffered combinatorial wires:\n", log_id(module)); - for (auto wire : buffered_wires) + for (auto wire : buffered_comb_wires) log(" %s\n", log_id(wire)); - log("\n"); } - eval_converges[module] = feedback_wires.empty() && buffered_wires.empty(); + eval_converges[module] = feedback_wires.empty() && buffered_comb_wires.empty(); + + for (auto item : flow.bit_has_state) + bit_has_state.insert(item); if (debug_info) { // Find wires that alias other wires or are tied to a constant; debug information can be enriched with these @@ -2191,7 +2284,7 @@ struct CxxrtlWorker { for (auto wire : module->wires()) { if (wire->name[0] != '\\') continue; - if (!localized_wires[wire]) + if (!unbuffered_wires[wire]) continue; const RTLIL::Wire *wire_it = wire; while (1) { @@ -2204,7 +2297,7 @@ struct CxxrtlWorker { RTLIL::SigSpec rhs_sig = node->connect.second; if (rhs_sig.is_wire()) { RTLIL::Wire *rhs_wire = rhs_sig.as_wire(); - if (localized_wires[rhs_wire]) { + if (unbuffered_wires[rhs_wire]) { wire_it = rhs_wire; // maybe an alias } else { debug_alias_wires[wire] = rhs_wire; // is an alias @@ -2220,24 +2313,26 @@ struct CxxrtlWorker { } } } - if (has_feedback_arcs || has_buffered_wires) { + if (has_feedback_arcs || has_buffered_comb_wires) { // Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated // by optimizing the design, if after `proc; flatten` there are any feedback wires remaining, it is very // likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message. const char *why_pessimistic = nullptr; if (has_feedback_arcs) why_pessimistic = "feedback wires"; - else if (has_buffered_wires) + else if (has_buffered_comb_wires) why_pessimistic = "buffered combinatorial wires"; log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic); - if (!max_opt_level) - log("Increasing the optimization level may eliminate %s from the design.\n", why_pessimistic); + if (!run_flatten) + log("Flattening may eliminate %s from the design.\n", why_pessimistic); + if (!run_proc) + log("Converting processes to netlists may eliminate %s from the design.\n", why_pessimistic); } } - void check_design(RTLIL::Design *design, bool &has_sync_init, bool &has_packed_mem) + void check_design(RTLIL::Design *design, bool &has_top, bool &has_sync_init, bool &has_packed_mem) { - has_sync_init = has_packed_mem = false; + has_sync_init = has_packed_mem = has_top = false; for (auto module : design->modules()) { if (module->get_blackbox_attribute() && !module->has_attribute(ID(cxxrtl_blackbox))) @@ -2249,13 +2344,17 @@ struct CxxrtlWorker { if (!design->selected_module(module)) continue; + if (module->get_bool_attribute(ID::top)) + has_top = true; + for (auto proc : module->processes) for (auto sync : proc.second->syncs) if (sync->type == RTLIL::STi) has_sync_init = true; - for (auto cell : module->cells()) - if (cell->type == ID($mem)) + // The Mem constructor also checks for well-formedness of $meminit cells, if any. + for (auto &mem : Mem::get_all_memories(module)) + if (mem.packed) has_packed_mem = true; } } @@ -2263,13 +2362,20 @@ struct CxxrtlWorker { void prepare_design(RTLIL::Design *design) { bool did_anything = false; - bool has_sync_init, has_packed_mem; + bool has_top, has_sync_init, has_packed_mem; log_push(); - check_design(design, has_sync_init, has_packed_mem); - if (run_proc_flatten) { - Pass::call(design, "proc"); + check_design(design, has_top, has_sync_init, has_packed_mem); + if (run_hierarchy && !has_top) { + Pass::call(design, "hierarchy -auto-top"); + did_anything = true; + } + if (run_flatten) { Pass::call(design, "flatten"); did_anything = true; + } + if (run_proc) { + Pass::call(design, "proc"); + did_anything = true; } else if (has_sync_init) { // We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those // in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.) @@ -2283,9 +2389,9 @@ struct CxxrtlWorker { did_anything = true; } // Recheck the design if it was modified. - if (has_sync_init || has_packed_mem) - check_design(design, has_sync_init, has_packed_mem); - log_assert(!(has_sync_init || has_packed_mem)); + if (did_anything) + check_design(design, has_top, has_sync_init, has_packed_mem); + log_assert(has_top && !has_sync_init && !has_packed_mem); log_pop(); if (did_anything) log_spacer(); @@ -2294,11 +2400,12 @@ struct CxxrtlWorker { }; struct CxxrtlBackend : public Backend { - static const int DEFAULT_OPT_LEVEL = 5; + static const int DEFAULT_OPT_LEVEL = 6; + static const int OPT_LEVEL_DEBUG = 4; static const int DEFAULT_DEBUG_LEVEL = 1; CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { } - void help() YS_OVERRIDE + void help() override { // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| log("\n"); @@ -2317,9 +2424,9 @@ struct CxxrtlBackend : public Backend { log(" top.step();\n"); log(" while (1) {\n"); log(" /* user logic */\n"); - log(" top.p_clk = value<1> {0u};\n"); + log(" top.p_clk.set(false);\n"); log(" top.step();\n"); - log(" top.p_clk = value<1> {1u};\n"); + log(" top.p_clk.set(true);\n"); log(" top.step();\n"); log(" }\n"); log(" }\n"); @@ -2466,6 +2573,22 @@ struct CxxrtlBackend : public Backend { log(" place the generated code into namespace <ns-name>. if not specified,\n"); log(" \"cxxrtl_design\" is used.\n"); log("\n"); + log(" -nohierarchy\n"); + log(" use design hierarchy as-is. in most designs, a top module should be\n"); + log(" present as it is exposed through the C API and has unbuffered outputs\n"); + log(" for improved performance; it will be determined automatically if absent.\n"); + log("\n"); + log(" -noflatten\n"); + log(" don't flatten the design. fully flattened designs can evaluate within\n"); + log(" one delta cycle if they have no combinatorial feedback.\n"); + log(" note that the debug interface and waveform dumps use full hierarchical\n"); + log(" names for all wires even in flattened designs.\n"); + log("\n"); + log(" -noproc\n"); + log(" don't convert processes to netlists. in most designs, converting\n"); + log(" processes significantly improves evaluation performance at the cost of\n"); + log(" slight increase in compilation time.\n"); + log("\n"); log(" -O <level>\n"); log(" set the optimization level. the default is -O%d. higher optimization\n", DEFAULT_OPT_LEVEL); log(" levels dramatically decrease compile and run time, and highest level\n"); @@ -2475,19 +2598,26 @@ struct CxxrtlBackend : public Backend { log(" no optimization.\n"); log("\n"); log(" -O1\n"); - log(" elide internal wires if possible.\n"); + log(" localize internal wires if possible.\n"); log("\n"); log(" -O2\n"); - log(" like -O1, and localize internal wires if possible.\n"); + log(" like -O1, and unbuffer internal wires if possible.\n"); log("\n"); log(" -O3\n"); - log(" like -O2, and elide public wires not marked (*keep*) if possible.\n"); + log(" like -O2, and elide internal wires if possible.\n"); log("\n"); log(" -O4\n"); - log(" like -O3, and localize public wires not marked (*keep*) if possible.\n"); + log(" like -O3, and unbuffer public wires not marked (*keep*) if possible.\n"); log("\n"); log(" -O5\n"); - log(" like -O4, and run `proc; flatten` first.\n"); + log(" like -O4, and localize public wires not marked (*keep*) if possible.\n"); + log("\n"); + log(" -O6\n"); + log(" like -O5, and elide public wires not marked (*keep*) if possible.\n"); + log("\n"); + log(" -Og\n"); + log(" highest optimization level that provides debug information for all\n"); + log(" public wires. currently, alias for -O%d.\n", OPT_LEVEL_DEBUG); log("\n"); log(" -g <level>\n"); log(" set the debug level. the default is -g%d. higher debug levels provide\n", DEFAULT_DEBUG_LEVEL); @@ -2502,8 +2632,11 @@ struct CxxrtlBackend : public Backend { log("\n"); } - void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) override { + bool nohierarchy = false; + bool noflatten = false; + bool noproc = false; int opt_level = DEFAULT_OPT_LEVEL; int debug_level = DEFAULT_DEBUG_LEVEL; CxxrtlWorker worker; @@ -2513,6 +2646,27 @@ struct CxxrtlBackend : public Backend { size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-nohierarchy") { + nohierarchy = true; + continue; + } + if (args[argidx] == "-noflatten") { + noflatten = true; + continue; + } + if (args[argidx] == "-noproc") { + noproc = true; + continue; + } + if (args[argidx] == "-Og") { + opt_level = OPT_LEVEL_DEBUG; + continue; + } + if (args[argidx] == "-O" && argidx+1 < args.size() && args[argidx+1] == "g") { + argidx++; + opt_level = OPT_LEVEL_DEBUG; + continue; + } if (args[argidx] == "-O" && argidx+1 < args.size()) { opt_level = std::stoi(args[++argidx]); continue; @@ -2541,30 +2695,34 @@ struct CxxrtlBackend : public Backend { } extra_args(f, filename, args, argidx); + worker.run_hierarchy = !nohierarchy; + worker.run_flatten = !noflatten; + worker.run_proc = !noproc; switch (opt_level) { // the highest level here must match DEFAULT_OPT_LEVEL + case 6: + worker.elide_public = true; + YS_FALLTHROUGH case 5: - worker.max_opt_level = true; - worker.run_proc_flatten = true; + worker.localize_public = true; YS_FALLTHROUGH case 4: - worker.localize_public = true; + worker.unbuffer_public = true; YS_FALLTHROUGH case 3: - worker.elide_public = true; + worker.elide_internal = true; YS_FALLTHROUGH case 2: worker.localize_internal = true; YS_FALLTHROUGH case 1: - worker.elide_internal = true; + worker.unbuffer_internal = true; YS_FALLTHROUGH case 0: break; default: log_cmd_error("Invalid optimization level %d.\n", opt_level); } - switch (debug_level) { // the highest level here must match DEFAULT_DEBUG_LEVEL case 1: diff --git a/backends/cxxrtl/cxxrtl_capi.cc b/backends/cxxrtl/cxxrtl_capi.cc index 489d72da5..b77e4c491 100644 --- a/backends/cxxrtl/cxxrtl_capi.cc +++ b/backends/cxxrtl/cxxrtl_capi.cc @@ -43,18 +43,29 @@ void cxxrtl_destroy(cxxrtl_handle handle) { delete handle; } +int cxxrtl_eval(cxxrtl_handle handle) { + return handle->module->eval(); +} + +int cxxrtl_commit(cxxrtl_handle handle) { + return handle->module->commit(); +} + size_t cxxrtl_step(cxxrtl_handle handle) { return handle->module->step(); } -cxxrtl_object *cxxrtl_get(cxxrtl_handle handle, const char *name) { - if (handle->objects.count(name) > 0) - return static_cast<cxxrtl_object*>(&handle->objects.at(name)); - return nullptr; +struct cxxrtl_object *cxxrtl_get_parts(cxxrtl_handle handle, const char *name, size_t *parts) { + auto it = handle->objects.table.find(name); + if (it == handle->objects.table.end()) + return nullptr; + *parts = it->second.size(); + return static_cast<cxxrtl_object*>(&it->second[0]); } void cxxrtl_enum(cxxrtl_handle handle, void *data, - void (*callback)(void *data, const char *name, cxxrtl_object *object)) { - for (auto &it : handle->objects) - callback(data, it.first.c_str(), static_cast<cxxrtl_object*>(&it.second)); + void (*callback)(void *data, const char *name, + cxxrtl_object *object, size_t parts)) { + for (auto &it : handle->objects.table) + callback(data, it.first.c_str(), static_cast<cxxrtl_object*>(&it.second[0]), it.second.size()); } diff --git a/backends/cxxrtl/cxxrtl_capi.h b/backends/cxxrtl/cxxrtl_capi.h index 46aa662b2..385d6dcf3 100644 --- a/backends/cxxrtl/cxxrtl_capi.h +++ b/backends/cxxrtl/cxxrtl_capi.h @@ -26,6 +26,7 @@ #include <stddef.h> #include <stdint.h> +#include <assert.h> #ifdef __cplusplus extern "C" { @@ -54,12 +55,28 @@ cxxrtl_handle cxxrtl_create(cxxrtl_toplevel design); // Release all resources used by a design and its handle. void cxxrtl_destroy(cxxrtl_handle handle); +// Evaluate the design, propagating changes on inputs to the `next` value of internal state and +// output wires. +// +// Returns 1 if the design is known to immediately converge, 0 otherwise. +int cxxrtl_eval(cxxrtl_handle handle); + +// Commit the design, replacing the `curr` value of internal state and output wires with the `next` +// value. +// +// Return 1 if any of the `curr` values were updated, 0 otherwise. +int cxxrtl_commit(cxxrtl_handle handle); + // Simulate the design to a fixed point. // // Returns the number of delta cycles. size_t cxxrtl_step(cxxrtl_handle handle); // Type of a simulated object. +// +// The type of a simulated object indicates the way it is stored and the operations that are legal +// to perform on it (i.e. won't crash the simulation). It says very little about object semantics, +// which is specified through flags. enum cxxrtl_type { // Values correspond to singly buffered netlist nodes, i.e. nodes driven exclusively by // combinatorial cells, or toplevel input nodes. @@ -73,7 +90,8 @@ enum cxxrtl_type { CXXRTL_VALUE = 0, // Wires correspond to doubly buffered netlist nodes, i.e. nodes driven, at least in part, by - // storage cells, or by combinatorial cells that are a part of a feedback path. + // storage cells, or by combinatorial cells that are a part of a feedback path. They are also + // present in non-optimized builds. // // Wires can be inspected via the `curr` pointer and modified via the `next` pointer (which are // distinct for wires). Note that changes to the bits driven by combinatorial cells will be @@ -89,7 +107,74 @@ enum cxxrtl_type { // always NULL. CXXRTL_MEMORY = 2, - // More object types will be added in the future, but the existing ones will never change. + // Aliases correspond to netlist nodes driven by another node such that their value is always + // exactly equal. + // + // Aliases can be inspected via the `curr` pointer. They cannot be modified, and the `next` + // pointer is always NULL. + CXXRTL_ALIAS = 3, + + // More object types may be added in the future, but the existing ones will never change. +}; + +// Flags of a simulated object. +// +// The flags of a simulated object indicate its role in the netlist: +// * The flags `CXXRTL_INPUT` and `CXXRTL_OUTPUT` designate module ports. +// * The flags `CXXRTL_DRIVEN_SYNC`, `CXXRTL_DRIVEN_COMB`, and `CXXRTL_UNDRIVEN` specify +// the semantics of node state. An object with several of these flags set has different bits +// follow different semantics. +enum cxxrtl_flag { + // Node is a module input port. + // + // This flag can be set on objects of type `CXXRTL_VALUE` and `CXXRTL_WIRE`. It may be combined + // with `CXXRTL_OUTPUT`, as well as other flags. + CXXRTL_INPUT = 1 << 0, + + // Node is a module output port. + // + // This flag can be set on objects of type `CXXRTL_WIRE`. It may be combined with `CXXRTL_INPUT`, + // as well as other flags. + CXXRTL_OUTPUT = 1 << 1, + + // Node is a module inout port. + // + // This flag can be set on objects of type `CXXRTL_WIRE`. It may be combined with other flags. + CXXRTL_INOUT = (CXXRTL_INPUT|CXXRTL_OUTPUT), + + // Node has bits that are driven by a storage cell. + // + // This flag can be set on objects of type `CXXRTL_WIRE`. It may be combined with + // `CXXRTL_DRIVEN_COMB` and `CXXRTL_UNDRIVEN`, as well as other flags. + // + // This flag is set on wires that have bits connected directly to the output of a flip-flop or + // a latch, and hold its state. Many `CXXRTL_WIRE` objects may not have the `CXXRTL_DRIVEN_SYNC` + // flag set; for example, output ports and feedback wires generally won't. Writing to the `next` + // pointer of these wires updates stored state, and for designs without combinatorial loops, + // capturing the value from every of these wires through the `curr` pointer creates a complete + // snapshot of the design state. + CXXRTL_DRIVEN_SYNC = 1 << 2, + + // Node has bits that are driven by a combinatorial cell or another node. + // + // This flag can be set on objects of type `CXXRTL_VALUE` and `CXXRTL_WIRE`. It may be combined + // with `CXXRTL_DRIVEN_SYNC` and `CXXRTL_UNDRIVEN`, as well as other flags. + // + // This flag is set on objects that have bits connected to the output of a combinatorial cell, + // or directly to another node. For designs without combinatorial loops, writing to such bits + // through the `next` pointer (if it is not NULL) has no effect. + CXXRTL_DRIVEN_COMB = 1 << 3, + + // Node has bits that are not driven. + // + // This flag can be set on objects of type `CXXRTL_VALUE` and `CXXRTL_WIRE`. It may be combined + // with `CXXRTL_DRIVEN_SYNC` and `CXXRTL_DRIVEN_COMB`, as well as other flags. + // + // This flag is set on objects that have bits not driven by an output of any cell or by another + // node, such as inputs and dangling wires. + CXXRTL_UNDRIVEN = 1 << 4, + + // More object flags may be added in the future, but the existing ones will never change. }; // Description of a simulated object. @@ -103,12 +188,21 @@ struct cxxrtl_object { // determines all other properties of the object. uint32_t type; // actually `enum cxxrtl_type` + // Flags of the object. + uint32_t flags; // actually bit mask of `enum cxxrtl_flags` + // Width of the object in bits. size_t width; + // Index of the least significant bit. + size_t lsb_at; + // Depth of the object. Only meaningful for memories; for other objects, always 1. size_t depth; + // Index of the first word. Only meaningful for memories; for other objects, always 0; + size_t zero_at; + // Bits stored in the object, as 32-bit chunks, least significant bits first. // // The width is rounded up to a multiple of 32; the padding bits are always set to 0 by @@ -123,7 +217,7 @@ struct cxxrtl_object { uint32_t *curr; uint32_t *next; - // More description fields will be added in the future, but the existing ones will never change. + // More description fields may be added in the future, but the existing ones will never change. }; // Retrieve description of a simulated object. @@ -133,17 +227,36 @@ struct cxxrtl_object { // the top-level module instantiates a module `foo`, which in turn contains a wire `bar`, the full // hierarchical name is `\foo \bar`. // -// Returns the object if it was found, NULL otherwise. The returned value is valid until the design -// is destroyed. -struct cxxrtl_object *cxxrtl_get(cxxrtl_handle handle, const char *name); +// The storage of a single abstract object may be split (usually with the `splitnets` pass) into +// many physical parts, all of which correspond to the same hierarchical name. To handle such cases, +// this function returns an array and writes its length to `parts`. The array is sorted by `lsb_at`. +// +// Returns the object parts if it was found, NULL otherwise. The returned parts are valid until +// the design is destroyed. +struct cxxrtl_object *cxxrtl_get_parts(cxxrtl_handle handle, const char *name, size_t *parts); + +// Retrieve description of a single part simulated object. +// +// This function is a shortcut for the most common use of `cxxrtl_get_parts`. It asserts that, +// if the object exists, it consists of a single part. If assertions are disabled, it returns NULL +// for multi-part objects. +inline struct cxxrtl_object *cxxrtl_get(cxxrtl_handle handle, const char *name) { + size_t parts = 0; + struct cxxrtl_object *object = cxxrtl_get_parts(handle, name, &parts); + assert(object == NULL || parts == 1); + if (object == NULL || parts == 1) + return object; + return NULL; +} // Enumerate simulated objects. // // For every object in the simulation, `callback` is called with the provided `data`, the full -// hierarchical name of the object (see `cxxrtl_get` for details), and the object description. +// hierarchical name of the object (see `cxxrtl_get` for details), and the object parts. // The provided `name` and `object` values are valid until the design is destroyed. void cxxrtl_enum(cxxrtl_handle handle, void *data, - void (*callback)(void *data, const char *name, struct cxxrtl_object *object)); + void (*callback)(void *data, const char *name, + struct cxxrtl_object *object, size_t parts)); #ifdef __cplusplus } diff --git a/backends/cxxrtl/cxxrtl_vcd.h b/backends/cxxrtl/cxxrtl_vcd.h index f6b78bbf7..dbeabbaf2 100644 --- a/backends/cxxrtl/cxxrtl_vcd.h +++ b/backends/cxxrtl/cxxrtl_vcd.h @@ -66,11 +66,19 @@ class vcd_writer { } while (ident != 0); } - void emit_var(const variable &var, const std::string &type, const std::string &name) { + void emit_var(const variable &var, const std::string &type, const std::string &name, + size_t lsb_at, bool multipart) { assert(!streaming); buffer += "$var " + type + " " + std::to_string(var.width) + " "; emit_ident(var.ident); - buffer += " " + name + " $end\n"; + buffer += " " + name; + if (multipart || name.back() == ']' || lsb_at != 0) { + if (var.width == 1) + buffer += " [" + std::to_string(lsb_at) + "]"; + else + buffer += " [" + std::to_string(lsb_at + var.width - 1) + ":" + std::to_string(lsb_at) + "]"; + } + buffer += " $end\n"; } void emit_enddefinitions() { @@ -104,13 +112,13 @@ class vcd_writer { buffer += '\n'; } - const variable ®ister_variable(size_t width, chunk_t *curr, bool immutable = false) { + const variable ®ister_variable(size_t width, chunk_t *curr, bool constant = false) { if (aliases.count(curr)) { return variables[aliases[curr]]; } else { const size_t chunks = (width + (sizeof(chunk_t) * 8 - 1)) / (sizeof(chunk_t) * 8); aliases[curr] = variables.size(); - if (immutable) { + if (constant) { variables.emplace_back(variable { variables.size(), width, curr, (size_t)-1 }); } else { variables.emplace_back(variable { variables.size(), width, curr, cache.size() }); @@ -122,7 +130,7 @@ class vcd_writer { bool test_variable(const variable &var) { if (var.prev_off == (size_t)-1) - return false; // immutable + return false; // constant const size_t chunks = (var.width + (sizeof(chunk_t) * 8 - 1)) / (sizeof(chunk_t) * 8); if (std::equal(&var.curr[0], &var.curr[chunks], &cache[var.prev_off])) { return false; @@ -155,7 +163,7 @@ public: emit_timescale(number, unit); } - void add(const std::string &hier_name, const debug_item &item) { + void add(const std::string &hier_name, const debug_item &item, bool multipart = false) { std::vector<std::string> scope = split_hierarchy(hier_name); std::string name = scope.back(); scope.pop_back(); @@ -164,20 +172,31 @@ public: switch (item.type) { // Not the best naming but oh well... case debug_item::VALUE: - emit_var(register_variable(item.width, item.curr, /*immutable=*/item.next == nullptr), "wire", name); + emit_var(register_variable(item.width, item.curr, /*constant=*/item.next == nullptr), + "wire", name, item.lsb_at, multipart); break; case debug_item::WIRE: - emit_var(register_variable(item.width, item.curr), "reg", name); + emit_var(register_variable(item.width, item.curr), + "reg", name, item.lsb_at, multipart); break; case debug_item::MEMORY: { const size_t stride = (item.width + (sizeof(chunk_t) * 8 - 1)) / (sizeof(chunk_t) * 8); for (size_t index = 0; index < item.depth; index++) { chunk_t *nth_curr = &item.curr[stride * index]; std::string nth_name = name + '[' + std::to_string(index) + ']'; - emit_var(register_variable(item.width, nth_curr), "reg", nth_name); + emit_var(register_variable(item.width, nth_curr), + "reg", nth_name, item.lsb_at, multipart); } break; } + case debug_item::ALIAS: + // Like VALUE, but, even though `item.next == nullptr` always holds, the underlying value + // can actually change, and must be tracked. In most cases the VCD identifier will be + // unified with the aliased reg, but we should handle the case where only the alias is + // added to the VCD writer, too. + emit_var(register_variable(item.width, item.curr), + "wire", name, item.lsb_at, multipart); + break; } } @@ -185,9 +204,10 @@ public: void add(const debug_items &items, const Filter &filter) { // `debug_items` is a map, so the items are already sorted in an order optimal for emitting // VCD scope sections. - for (auto &it : items) - if (filter(it.first, it.second)) - add(it.first, it.second); + for (auto &it : items.table) + for (auto &part : it.second) + if (filter(it.first, part)) + add(it.first, part, it.second.size() > 1); } void add(const debug_items &items) { @@ -198,7 +218,7 @@ public: void add_without_memories(const debug_items &items) { this->template add(items, [](const std::string &, const debug_item &item) { - return item.type == debug_item::VALUE || item.type == debug_item::WIRE; + return item.type != debug_item::MEMORY; }); } diff --git a/backends/cxxrtl/cxxrtl_vcd_capi.h b/backends/cxxrtl/cxxrtl_vcd_capi.h index 6a7fb9f47..d55afe223 100644 --- a/backends/cxxrtl/cxxrtl_vcd_capi.h +++ b/backends/cxxrtl/cxxrtl_vcd_capi.h @@ -75,8 +75,8 @@ void cxxrtl_vcd_add_from(cxxrtl_vcd vcd, cxxrtl_handle handle); // // Objects can only be scheduled before the first call to `cxxrtl_vcd_sample`. void cxxrtl_vcd_add_from_if(cxxrtl_vcd vcd, cxxrtl_handle handle, void *data, - int (*filter)(void *data, const char *name, - const struct cxxrtl_object *object)); + int (*filter)(void *data, const char *name, + const struct cxxrtl_object *object)); // Schedule all CXXRTL objects in a simulation except for memories. // |