diff options
Diffstat (limited to 'backends/cxxrtl/cxxrtl.h')
-rw-r--r-- | backends/cxxrtl/cxxrtl.h | 243 |
1 files changed, 191 insertions, 52 deletions
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index f0d7b9fc7..b4ffa87cd 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -36,22 +36,48 @@ #include <map> #include <algorithm> #include <memory> +#include <functional> #include <sstream> #include <backends/cxxrtl/cxxrtl_capi.h> +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. // It generates a lot of specialized template functions with relatively large bodies that, when inlined // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. // Because of this, most of the CXXRTL runtime must be always inlined for best performance. -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif #if __has_attribute(always_inline) #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) #else #define CXXRTL_ALWAYS_INLINE inline #endif +// Conversely, some functions in the generated code are extremely large yet very cold, with both of these +// properties being extreme enough to confuse C++ compilers into spending pathological amounts of time +// on a futile (the code becomes worse) attempt to optimize the least important parts of code. +#if __has_attribute(optnone) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__)) +#elif __has_attribute(optimize) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0))) +#else +#define CXXRTL_EXTREMELY_COLD +#endif + +// CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior +// of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at +// most result in undefined simulation results). +// +// Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating +// the simulation into another process that should survive violating RTL contracts. +#ifndef CXXRTL_ASSERT +#ifndef CXXRTL_NDEBUG +#define CXXRTL_ASSERT(x) assert(x) +#else +#define CXXRTL_ASSERT(x) +#endif +#endif namespace cxxrtl { @@ -96,9 +122,11 @@ struct value : public expr_base<value<Bits>> { explicit constexpr value(Init ...init) : data{init...} {} value(const value<Bits> &) = default; - value(value<Bits> &&) = default; value<Bits> &operator=(const value<Bits> &) = default; + value(value<Bits> &&) = default; + value<Bits> &operator=(value<Bits> &&) = default; + // A (no-op) helper that forces the cast to value<>. CXXRTL_ALWAYS_INLINE const value<Bits> &val() const { @@ -289,6 +317,14 @@ struct value : public expr_base<value<Bits>> { return sext_cast<NewBits>()(*this); } + // Bit replication is far more efficient than the equivalent concatenation. + template<size_t Count> + CXXRTL_ALWAYS_INLINE + value<Bits * Count> repeat() const { + static_assert(Bits == 1, "repeat() is implemented only for 1-bit values"); + return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>(); + } + // Operations with run-time parameters (offsets, amounts, etc). // // These operations are used for computations. @@ -421,6 +457,42 @@ struct value : public expr_base<value<Bits>> { return shr<AmountBits, /*Signed=*/true>(amount); } + template<size_t ResultBits, size_t SelBits> + value<ResultBits> bmux(const value<SelBits> &sel) const { + static_assert(ResultBits << SelBits == Bits, "invalid sizes used in bmux()"); + size_t amount = sel.data[0] * ResultBits; + size_t shift_chunks = amount / chunk::bits; + size_t shift_bits = amount % chunk::bits; + value<ResultBits> result; + chunk::type carry = 0; + if (ResultBits % chunk::bits + shift_bits > chunk::bits) + carry = data[result.chunks + shift_chunks] << (chunk::bits - shift_bits); + for (size_t n = 0; n < result.chunks; n++) { + result.data[result.chunks - 1 - n] = carry | (data[result.chunks + shift_chunks - 1 - n] >> shift_bits); + carry = (shift_bits == 0) ? 0 + : data[result.chunks + shift_chunks - 1 - n] << (chunk::bits - shift_bits); + } + return result; + } + + template<size_t ResultBits, size_t SelBits> + value<ResultBits> demux(const value<SelBits> &sel) const { + static_assert(Bits << SelBits == ResultBits, "invalid sizes used in demux()"); + size_t amount = sel.data[0] * Bits; + size_t shift_chunks = amount / chunk::bits; + size_t shift_bits = amount % chunk::bits; + value<ResultBits> result; + chunk::type carry = 0; + for (size_t n = 0; n < chunks; n++) { + result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; + carry = (shift_bits == 0) ? 0 + : data[n] >> (chunk::bits - shift_bits); + } + if (Bits % chunk::bits + shift_bits > chunk::bits) + result.data[shift_chunks + chunks] = carry; + return result; + } + size_t ctpop() const { size_t count = 0; for (size_t n = 0; n < chunks; n++) { @@ -452,10 +524,11 @@ struct value : public expr_base<value<Bits>> { bool carry = CarryIn; for (size_t n = 0; n < result.chunks; n++) { result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry; + if (result.chunks - 1 == n) + result.data[result.chunks - 1] &= result.msb_mask; carry = (result.data[n] < data[n]) || (result.data[n] == data[n] && carry); } - result.data[result.chunks - 1] &= result.msb_mask; return {result, carry}; } @@ -642,14 +715,20 @@ struct wire { value<Bits> next; wire() = default; - constexpr wire(const value<Bits> &init) : curr(init), next(init) {} + explicit constexpr wire(const value<Bits> &init) : curr(init), next(init) {} template<typename... Init> explicit constexpr wire(Init ...init) : curr{init...}, next{init...} {} + // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire, + // e.g. because a module is built with a different optimization level, then existing code could + // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure + // this doesn't happen, prohibit copying and copy-assigning wires. wire(const wire<Bits> &) = delete; - wire(wire<Bits> &&) = default; wire<Bits> &operator=(const wire<Bits> &) = delete; + wire(wire<Bits> &&) = default; + wire<Bits> &operator=(wire<Bits> &&) = default; + template<class IntegerT> CXXRTL_ALWAYS_INLINE IntegerT get() const { @@ -679,47 +758,32 @@ std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) { template<size_t Width> struct memory { - std::vector<value<Width>> data; - - size_t depth() const { - return data.size(); - } + const size_t depth; + std::unique_ptr<value<Width>[]> data; - memory() = delete; - explicit memory(size_t depth) : data(depth) {} + explicit memory(size_t depth) : depth(depth), data(new value<Width>[depth]) {} memory(const memory<Width> &) = delete; memory<Width> &operator=(const memory<Width> &) = delete; - // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it - // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't - // construct an initializer_list in a constexpr (or something) and so if you try to do that the whole thing is - // first copied on the stack (probably overflowing it) and then again into `data`. - template<size_t Size> - struct init { - size_t offset; - value<Width> data[Size]; - }; - - template<size_t... InitSize> - explicit memory(size_t depth, const init<InitSize> &...init) : data(depth) { - data.resize(depth); - // This utterly reprehensible construct is the most reasonable way to apply a function to every element - // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list. - auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...}; - (void)_; + memory(memory<Width> &&) = default; + memory<Width> &operator=(memory<Width> &&other) { + assert(depth == other.depth); + data = std::move(other.data); + write_queue = std::move(other.write_queue); + return *this; } // An operator for direct memory reads. May be used at any time during the simulation. const value<Width> &operator [](size_t index) const { - assert(index < data.size()); + assert(index < depth); return data[index]; } // An operator for direct memory writes. May only be used before the simulation is started. If used // after the simulation is started, the design may malfunction. value<Width> &operator [](size_t index) { - assert(index < data.size()); + assert(index < depth); return data[index]; } @@ -744,7 +808,7 @@ struct memory { std::vector<write> write_queue; void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) { - assert(index < data.size()); + assert(index < depth); // Queue up the write while keeping the queue sorted by priority. write_queue.insert( std::upper_bound(write_queue.begin(), write_queue.end(), priority, @@ -814,35 +878,52 @@ struct metadata { typedef std::map<std::string, metadata> metadata_map; -// Helper class to disambiguate values/wires and their aliases. +// Tag class to disambiguate values/wires and their aliases. struct debug_alias {}; +// Tag declaration to disambiguate values and debug outlines. +using debug_outline = ::_cxxrtl_outline; + // This structure is intended for consumption via foreign function interfaces, like Python's ctypes. // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++. // // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used // in the C API, or it would not be possible to cast between the pointers to these. struct debug_item : ::cxxrtl_object { + // Object types. enum : uint32_t { - VALUE = CXXRTL_VALUE, - WIRE = CXXRTL_WIRE, - MEMORY = CXXRTL_MEMORY, - ALIAS = CXXRTL_ALIAS, + VALUE = CXXRTL_VALUE, + WIRE = CXXRTL_WIRE, + MEMORY = CXXRTL_MEMORY, + ALIAS = CXXRTL_ALIAS, + OUTLINE = CXXRTL_OUTLINE, + }; + + // Object flags. + enum : uint32_t { + INPUT = CXXRTL_INPUT, + OUTPUT = CXXRTL_OUTPUT, + INOUT = CXXRTL_INOUT, + DRIVEN_SYNC = CXXRTL_DRIVEN_SYNC, + DRIVEN_COMB = CXXRTL_DRIVEN_COMB, + UNDRIVEN = CXXRTL_UNDRIVEN, }; debug_item(const ::cxxrtl_object &object) : cxxrtl_object(object) {} template<size_t Bits> - debug_item(value<Bits> &item, size_t lsb_offset = 0) { + debug_item(value<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), "value<Bits> is not compatible with C layout"); type = VALUE; + flags = flags_; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = item.data; next = item.data; + outline = nullptr; } template<size_t Bits> @@ -850,26 +931,30 @@ struct debug_item : ::cxxrtl_object { static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), "value<Bits> is not compatible with C layout"); type = VALUE; + flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast<chunk_t*>(item.data); next = nullptr; + outline = nullptr; } template<size_t Bits> - debug_item(wire<Bits> &item, size_t lsb_offset = 0) { + debug_item(wire<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) { static_assert(sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) && sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t), "wire<Bits> is not compatible with C layout"); type = WIRE; + flags = flags_; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = item.curr.data; next = item.next.data; + outline = nullptr; } template<size_t Width> @@ -877,12 +962,14 @@ struct debug_item : ::cxxrtl_object { static_assert(sizeof(item.data[0]) == value<Width>::chunks * sizeof(chunk_t), "memory<Width> is not compatible with C layout"); type = MEMORY; + flags = 0; width = Width; lsb_at = 0; - depth = item.data.size(); + depth = item.depth; zero_at = zero_offset; - curr = item.data.empty() ? nullptr : item.data[0].data; + curr = item.data ? item.data[0].data : nullptr; next = nullptr; + outline = nullptr; } template<size_t Bits> @@ -890,12 +977,14 @@ struct debug_item : ::cxxrtl_object { static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), "value<Bits> is not compatible with C layout"); type = ALIAS; + flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast<chunk_t*>(item.data); next = nullptr; + outline = nullptr; } template<size_t Bits> @@ -904,12 +993,45 @@ struct debug_item : ::cxxrtl_object { sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t), "wire<Bits> is not compatible with C layout"); type = ALIAS; + flags = DRIVEN_COMB; width = Bits; lsb_at = lsb_offset; depth = 1; zero_at = 0; curr = const_cast<chunk_t*>(item.curr.data); next = nullptr; + outline = nullptr; + } + + template<size_t Bits> + debug_item(debug_outline &group, const value<Bits> &item, size_t lsb_offset = 0) { + static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t), + "value<Bits> is not compatible with C layout"); + type = OUTLINE; + flags = DRIVEN_COMB; + width = Bits; + lsb_at = lsb_offset; + depth = 1; + zero_at = 0; + curr = const_cast<chunk_t*>(item.data); + next = nullptr; + outline = &group; + } + + template<size_t Bits, class IntegerT> + IntegerT get() const { + assert(width == Bits && depth == 1); + value<Bits> item; + std::copy(curr, curr + value<Bits>::chunks, item.data); + return item.template get<IntegerT>(); + } + + template<size_t Bits, class IntegerT> + void set(IntegerT other) const { + assert(width == Bits && depth == 1); + value<Bits> item; + item.template set<IntegerT>(other); + std::copy(item.data, item.data + value<Bits>::chunks, next); } }; static_assert(std::is_standard_layout<debug_item>::value, "debug_item is not compatible with C layout"); @@ -947,13 +1069,25 @@ struct debug_items { } }; +// Tag class to disambiguate the default constructor used by the toplevel module that calls reset(), +// and the constructor of interior modules that should not call it. +struct interior {}; + struct module { module() {} virtual ~module() {} + // Modules with black boxes cannot be copied. Although not all designs include black boxes, + // delete the copy constructor and copy assignment operator to make sure that any downstream + // code that manipulates modules doesn't accidentally depend on their availability. module(const module &) = delete; module &operator=(const module &) = delete; + module(module &&) = default; + module &operator=(module &&) = default; + + virtual void reset() = 0; + virtual bool eval() = 0; virtual bool commit() = 0; @@ -974,11 +1108,16 @@ struct module { } // namespace cxxrtl -// Internal structure used to communicate with the implementation of the C interface. +// Internal structures used to communicate with the implementation of the C interface. + typedef struct _cxxrtl_toplevel { std::unique_ptr<cxxrtl::module> module; } *cxxrtl_toplevel; +typedef struct _cxxrtl_outline { + std::function<void()> eval; +} *cxxrtl_outline; + // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic // and indepenent of Yosys implementation details. // @@ -1112,49 +1251,49 @@ value<BitsY> xnor_ss(const value<BitsA> &a, const value<BitsB> &b) { template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> shl_uu(const value<BitsA> &a, const value<BitsB> &b) { - return a.template zcast<BitsY>().template shl(b); + return a.template zcast<BitsY>().shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> shl_su(const value<BitsA> &a, const value<BitsB> &b) { - return a.template scast<BitsY>().template shl(b); + return a.template scast<BitsY>().shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> sshl_uu(const value<BitsA> &a, const value<BitsB> &b) { - return a.template zcast<BitsY>().template shl(b); + return a.template zcast<BitsY>().shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> sshl_su(const value<BitsA> &a, const value<BitsB> &b) { - return a.template scast<BitsY>().template shl(b); + return a.template scast<BitsY>().shl(b); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> shr_uu(const value<BitsA> &a, const value<BitsB> &b) { - return a.template shr(b).template zcast<BitsY>(); + return a.shr(b).template zcast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> shr_su(const value<BitsA> &a, const value<BitsB> &b) { - return a.template shr(b).template scast<BitsY>(); + return a.shr(b).template scast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> sshr_uu(const value<BitsA> &a, const value<BitsB> &b) { - return a.template shr(b).template zcast<BitsY>(); + return a.shr(b).template zcast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> CXXRTL_ALWAYS_INLINE value<BitsY> sshr_su(const value<BitsA> &a, const value<BitsB> &b) { - return a.template sshr(b).template scast<BitsY>(); + return a.sshr(b).template scast<BitsY>(); } template<size_t BitsY, size_t BitsA, size_t BitsB> |