diff options
author | Claire Wolf <clifford@clifford.at> | 2020-04-22 16:50:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-22 16:50:45 +0200 |
commit | 95c74b319b36f8cb950196c3e1d10c945629c1f5 (patch) | |
tree | b45deaf4c88b165bc6be5492bf2944fba935c2c7 /backends/cxxrtl/cxxrtl.cc | |
parent | cd82afb740fc2f1d9bead89fd2683be989acad37 (diff) | |
parent | 93288b8eaea3e346275082352edeea5cfb4ac38a (diff) | |
download | yosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.tar.gz yosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.tar.bz2 yosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.zip |
Merge pull request #1979 from whitequark/cxxrtl-go-faster
cxxrtl: Gas gas gas! I'm gonna step on the gas! Tonight I'll fly!
Diffstat (limited to 'backends/cxxrtl/cxxrtl.cc')
-rw-r--r-- | backends/cxxrtl/cxxrtl.cc | 573 |
1 files changed, 392 insertions, 181 deletions
diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc index ef8335e50..237700b29 100644 --- a/backends/cxxrtl/cxxrtl.cc +++ b/backends/cxxrtl/cxxrtl.cc @@ -171,6 +171,11 @@ struct Scheduler { } }; +bool is_input_wire(const RTLIL::Wire *wire) +{ + return wire->port_input && !wire->port_output; +} + bool is_unary_cell(RTLIL::IdString type) { return type.in( @@ -210,11 +215,54 @@ bool is_internal_cell(RTLIL::IdString type) return type[0] == '$' && !type.begins_with("$paramod\\"); } +bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell) +{ + RTLIL::Module *cell_module = cell->module->design->module(cell->type); + log_assert(cell_module != nullptr); + return cell_module->get_bool_attribute(ID(cxxrtl.blackbox)); +} + +enum class CxxrtlPortType { + UNKNOWN = 0, // or mixed comb/sync + COMB = 1, + SYNC = 2, +}; + +CxxrtlPortType cxxrtl_port_type(const RTLIL::Cell *cell, RTLIL::IdString port) +{ + RTLIL::Module *cell_module = cell->module->design->module(cell->type); + if (cell_module == nullptr || !cell_module->get_bool_attribute(ID(cxxrtl.blackbox))) + return CxxrtlPortType::UNKNOWN; + RTLIL::Wire *cell_output_wire = cell_module->wire(port); + log_assert(cell_output_wire != nullptr); + bool is_comb = cell_output_wire->get_bool_attribute(ID(cxxrtl.comb)); + bool is_sync = cell_output_wire->get_bool_attribute(ID(cxxrtl.sync)); + if (is_comb && is_sync) + log_cmd_error("Port `%s.%s' is marked as both `cxxrtl.comb` and `cxxrtl.sync`.\n", + log_id(cell_module), log_signal(cell_output_wire)); + else if (is_comb) + return CxxrtlPortType::COMB; + else if (is_sync) + return CxxrtlPortType::SYNC; + return CxxrtlPortType::UNKNOWN; +} + +bool is_cxxrtl_comb_port(const RTLIL::Cell *cell, RTLIL::IdString port) +{ + return cxxrtl_port_type(cell, port) == CxxrtlPortType::COMB; +} + +bool is_cxxrtl_sync_port(const RTLIL::Cell *cell, RTLIL::IdString port) +{ + return cxxrtl_port_type(cell, port) == CxxrtlPortType::SYNC; +} + struct FlowGraph { struct Node { enum class Type { CONNECT, - CELL, + CELL_SYNC, + CELL_EVAL, PROCESS }; @@ -225,7 +273,7 @@ struct FlowGraph { }; std::vector<Node*> nodes; - dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_defs, wire_uses; + dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses; dict<const RTLIL::Wire*, bool> wire_def_elidable, wire_use_elidable; ~FlowGraph() @@ -234,13 +282,17 @@ struct FlowGraph { delete node; } - void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable) + void add_defs(Node *node, const RTLIL::SigSpec &sig, bool fully_sync, bool elidable) { for (auto chunk : sig.chunks()) - if (chunk.wire) - wire_defs[chunk.wire].insert(node); - // Only defs of an entire wire in the right order can be elided. - if (sig.is_wire()) + if (chunk.wire) { + if (fully_sync) + wire_sync_defs[chunk.wire].insert(node); + else + wire_comb_defs[chunk.wire].insert(node); + } + // Only comb defs of an entire wire in the right order can be elided. + if (!fully_sync && sig.is_wire()) wire_def_elidable[sig.as_wire()] = elidable; } @@ -268,7 +320,7 @@ struct FlowGraph { // Connections void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn) { - add_defs(node, conn.first, /*elidable=*/true); + add_defs(node, conn.first, /*fully_sync=*/false, /*elidable=*/true); add_uses(node, conn.second); } @@ -283,21 +335,59 @@ struct FlowGraph { } // Cells - void add_cell_defs_uses(Node *node, const RTLIL::Cell *cell) + void add_cell_sync_defs(Node *node, const RTLIL::Cell *cell) + { + // To understand why this node type is necessary and why it produces comb defs, consider a cell + // with input \i and sync output \o, used in a design such that \i is connected to \o. This does + // not result in a feedback arc because the output is synchronous. However, a naive implementation + // of code generation for cells that assigns to inputs, evaluates cells, assigns from outputs + // would not be able to immediately converge... + // + // wire<1> i_tmp; + // cell->p_i = i_tmp.curr; + // cell->eval(); + // i_tmp.next = cell->p_o.curr; + // + // ... since the wire connecting the input and output ports would not be localizable. To solve + // this, the cell is split into two scheduling nodes; one exclusively for sync outputs, and + // another for inputs and all non-sync outputs. This way the generated code can be rearranged... + // + // value<1> i_tmp; + // i_tmp = cell->p_o.curr; + // cell->p_i = i_tmp; + // cell->eval(); + // + // eliminating the unnecessary delta cycle. Conceptually, the CELL_SYNC node type is a series of + // connections of the form `connect \lhs \cell.\sync_output`; the right-hand side of these is not + // as a wire in RTLIL. If it was expressible, then `\cell.\sync_output` would have a sync def, + // and this node would be an ordinary CONNECT node, with `\lhs` having a comb def. Because it isn't, + // a special node type is used, the right-hand side does not appear anywhere, and the left-hand + // side has a comb def. + for (auto conn : cell->connections()) + if (cell->output(conn.first)) + if (is_cxxrtl_sync_port(cell, conn.first)) { + // See note regarding elidability below. + add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); + } + } + + void add_cell_eval_defs_uses(Node *node, const RTLIL::Cell *cell) { - log_assert(cell->known()); for (auto conn : cell->connections()) { if (cell->output(conn.first)) { - if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) - /* non-combinatorial outputs do not introduce defs */; - else if (is_elidable_cell(cell->type)) - add_defs(node, conn.second, /*elidable=*/true); + if (is_elidable_cell(cell->type)) + add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/true); + else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) + add_defs(node, conn.second, /*fully_sync=*/true, /*elidable=*/false); else if (is_internal_cell(cell->type)) - add_defs(node, conn.second, /*elidable=*/false); - else { - // Unlike outputs of internal cells (which generate code that depends on the ability to set the output - // wire bits), outputs of user cells are normal wires, and the wires connected to them can be elided. - add_defs(node, conn.second, /*elidable=*/true); + add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); + else if (!is_cxxrtl_sync_port(cell, conn.first)) { + // Although at first it looks like outputs of user-defined cells may always be elided, the reality is + // more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb + // outputs are assigned in a different way depending on whether the cell's eval() immediately converged. + // Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify + // the infrastructure required to elide outputs of cells with many of them. + add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false); } } if (cell->input(conn.first)) @@ -307,11 +397,27 @@ struct FlowGraph { Node *add_node(const RTLIL::Cell *cell) { + log_assert(cell->known()); + + bool has_fully_sync_outputs = false; + for (auto conn : cell->connections()) + if (cell->output(conn.first) && is_cxxrtl_sync_port(cell, conn.first)) { + has_fully_sync_outputs = true; + break; + } + if (has_fully_sync_outputs) { + Node *node = new Node; + node->type = Node::Type::CELL_SYNC; + node->cell = cell; + nodes.push_back(node); + add_cell_sync_defs(node, cell); + } + Node *node = new Node; - node->type = Node::Type::CELL; + node->type = Node::Type::CELL_EVAL; node->cell = cell; nodes.push_back(node); - add_cell_defs_uses(node, cell); + add_cell_eval_defs_uses(node, cell); return node; } @@ -319,7 +425,7 @@ struct FlowGraph { void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_) { for (auto &action : case_->actions) { - add_defs(node, action.first, /*elidable=*/false); + add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); add_uses(node, action.second); } for (auto sub_switch : case_->switches) { @@ -338,9 +444,9 @@ struct FlowGraph { for (auto sync : process->syncs) for (auto action : sync->actions) { if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe) - /* sync actions do not introduce feedback */; + add_defs(node, action.first, /*is_sync=*/true, /*elidable=*/false); else - add_defs(node, action.first, /*elidable=*/false); + add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); add_uses(node, action.second); } } @@ -356,13 +462,6 @@ struct FlowGraph { } }; -bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell) -{ - RTLIL::Module *cell_module = cell->module->design->module(cell->type); - log_assert(cell_module != nullptr); - return cell_module->get_bool_attribute(ID(cxxrtl.blackbox)); -} - std::vector<std::string> split_by(const std::string &str, const std::string &sep) { std::vector<std::string> result; @@ -414,22 +513,24 @@ struct CxxrtlWorker { bool elide_public = false; bool localize_internal = false; bool localize_public = false; - bool run_splitnets = false; + bool run_opt_clean_purge = false; + bool run_proc_flatten = false; + bool max_opt_level = false; std::ostringstream f; std::string indent; int temporary = 0; dict<const RTLIL::Module*, SigMap> sigmaps; - pool<const RTLIL::Wire*> sync_wires; - dict<RTLIL::SigBit, RTLIL::SyncType> sync_types; + pool<const RTLIL::Wire*> edge_wires; + dict<RTLIL::SigBit, RTLIL::SyncType> edge_types; pool<const RTLIL::Memory*> writable_memories; dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for; - dict<const RTLIL::Cell*, dict<RTLIL::Wire*, RTLIL::IdString>> cell_wire_defs; dict<const RTLIL::Wire*, FlowGraph::Node> elided_wires; dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule; pool<const RTLIL::Wire*> localized_wires; dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations; + dict<const RTLIL::Module*, bool> eval_converges; void inc_indent() { indent += "\t"; @@ -669,18 +770,14 @@ struct CxxrtlWorker { case FlowGraph::Node::Type::CONNECT: dump_connect_elided(node.connect); break; - case FlowGraph::Node::Type::CELL: - if (is_elidable_cell(node.cell->type)) { - dump_cell_elided(node.cell); - } else { - const char *access = is_cxxrtl_blackbox_cell(node.cell) ? "->" : "."; - f << mangle(node.cell) << access << mangle_wire_name(cell_wire_defs[node.cell][chunk.wire]) << ".curr"; - } + case FlowGraph::Node::Type::CELL_EVAL: + log_assert(is_elidable_cell(node.cell->type)); + dump_cell_elided(node.cell); break; default: log_assert(false); } - } else if (localized_wires[chunk.wire]) { + } else if (localized_wires[chunk.wire] || is_input_wire(chunk.wire)) { f << mangle(chunk.wire); } else { f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr"); @@ -740,8 +837,8 @@ struct CxxrtlWorker { case FlowGraph::Node::Type::CONNECT: collect_connect(node.connect, cells); break; - case FlowGraph::Node::Type::CELL: - collect_cell(node.cell, cells); + case FlowGraph::Node::Type::CELL_EVAL: + collect_cell_eval(node.cell, cells); break; default: log_assert(false); @@ -780,6 +877,19 @@ struct CxxrtlWorker { f << ";\n"; } + void dump_cell_sync(const RTLIL::Cell *cell) + { + const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : "."; + f << indent << "// cell " << cell->name.str() << " syncs\n"; + for (auto conn : cell->connections()) + if (cell->output(conn.first)) + if (is_cxxrtl_sync_port(cell, conn.first)) { + f << indent; + dump_sigspec_lhs(conn.second); + f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n"; + } + } + void dump_cell_elided(const RTLIL::Cell *cell) { // Unary cells @@ -833,7 +943,7 @@ struct CxxrtlWorker { elided_wires.count(cell->getPort(ID::Y).as_wire()); } - void collect_cell(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells) + void collect_cell_eval(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells) { if (!is_cell_elided(cell)) return; @@ -844,7 +954,7 @@ struct CxxrtlWorker { collect_sigspec_rhs(port.second, cells); } - void dump_cell(const RTLIL::Cell *cell) + void dump_cell_eval(const RTLIL::Cell *cell) { if (is_cell_elided(cell)) return; @@ -1088,26 +1198,69 @@ struct CxxrtlWorker { log_assert(cell->known()); const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : "."; for (auto conn : cell->connections()) - if (cell->input(conn.first)) { + if (cell->input(conn.first) && !cell->output(conn.first)) { + f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << " = "; + dump_sigspec_rhs(conn.second); + f << ";\n"; + if (getenv("CXXRTL_VOID_MY_WARRANTY")) { + // Until we have proper clock tree detection, this really awful hack that opportunistically + // propagates prev_* values for clocks can be used to estimate how much faster a design could + // be if only one clock edge was simulated by replacing: + // top.p_clk = value<1>{0u}; top.step(); + // top.p_clk = value<1>{1u}; top.step(); + // with: + // top.prev_p_clk = value<1>{0u}; top.p_clk = value<1>{1u}; top.step(); + // Don't rely on this; it will be removed without warning. + RTLIL::Module *cell_module = cell->module->design->module(cell->type); + if (cell_module != nullptr && cell_module->wire(conn.first) && conn.second.is_wire()) { + RTLIL::Wire *cell_module_wire = cell_module->wire(conn.first); + if (edge_wires[conn.second.as_wire()] && edge_wires[cell_module_wire]) { + f << indent << mangle(cell) << access << "prev_" << mangle(cell_module_wire) << " = "; + f << "prev_" << mangle(conn.second.as_wire()) << ";\n"; + } + } + } + } else if (cell->input(conn.first)) { f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << ".next = "; dump_sigspec_rhs(conn.second); f << ";\n"; } - f << indent << mangle(cell) << access << "eval();\n"; - for (auto conn : cell->connections()) { - if (conn.second.is_wire()) { - RTLIL::Wire *wire = conn.second.as_wire(); - if (elided_wires.count(wire) && cell_wire_defs[cell].count(wire)) - continue; - } - if (cell->output(conn.first)) { - if (conn.second.empty()) - continue; // ignore disconnected ports - f << indent; - dump_sigspec_lhs(conn.second); - f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n"; + auto assign_from_outputs = [&](bool cell_converged) { + for (auto conn : cell->connections()) { + if (cell->output(conn.first)) { + if (conn.second.empty()) + continue; // ignore disconnected ports + if (is_cxxrtl_sync_port(cell, conn.first)) + continue; // fully sync ports are handled in CELL_SYNC nodes + f << indent; + dump_sigspec_lhs(conn.second); + f << " = " << mangle(cell) << access << mangle_wire_name(conn.first); + // Similarly to how there is no purpose to buffering cell inputs, there is also no purpose to buffering + // combinatorial cell outputs in case the cell converges within one cycle. (To convince yourself that + // this optimization is valid, consider that, since the cell converged within one cycle, it would not + // have any buffered wires if they were not output ports. Imagine inlining the cell's eval() function, + // and consider the fate of the localized wires that used to be output ports.) + // + // Unlike cell inputs (which are never buffered), it is not possible to know apriori whether the cell + // (which may be late bound) will converge immediately. Because of this, the choice between using .curr + // (appropriate for buffered outputs) and .next (appropriate for unbuffered outputs) is made at runtime. + if (cell_converged && is_cxxrtl_comb_port(cell, conn.first)) + f << ".next;\n"; + else + f << ".curr;\n"; + } } - } + }; + f << indent << "if (" << mangle(cell) << access << "eval()) {\n"; + inc_indent(); + assign_from_outputs(/*cell_converged=*/true); + dec_indent(); + f << indent << "} else {\n"; + inc_indent(); + f << indent << "converged = false;\n"; + assign_from_outputs(/*cell_converged=*/false); + dec_indent(); + f << indent << "}\n"; } } @@ -1253,21 +1406,17 @@ struct CxxrtlWorker { } } - void dump_wire(const RTLIL::Wire *wire, bool is_local) + void dump_wire(const RTLIL::Wire *wire, bool is_local_context) { if (elided_wires.count(wire)) return; + if (localized_wires.count(wire) != is_local_context) + return; - if (is_local) { - if (!localized_wires.count(wire)) - return; - + if (is_local_context) { dump_attrs(wire); f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n"; } else { - if (localized_wires.count(wire)) - return; - std::string width; if (wire->module->has_attribute(ID(cxxrtl.blackbox)) && wire->has_attribute(ID(cxxrtl.width))) { width = wire->get_string_attribute(ID(cxxrtl.width)); @@ -1276,19 +1425,47 @@ struct CxxrtlWorker { } dump_attrs(wire); - f << indent << "wire<" << width << "> " << mangle(wire); + f << indent << (is_input_wire(wire) ? "value" : "wire") << "<" << width << "> " << mangle(wire); if (wire->has_attribute(ID::init)) { f << " "; dump_const_init(wire->attributes.at(ID::init)); } f << ";\n"; - if (sync_wires[wire]) { - for (auto sync_type : sync_types) { - if (sync_type.first.wire == wire) { - if (sync_type.second != RTLIL::STn) - f << indent << "bool posedge_" << mangle(sync_type.first) << " = false;\n"; - if (sync_type.second != RTLIL::STp) - f << indent << "bool negedge_" << mangle(sync_type.first) << " = false;\n"; + if (edge_wires[wire]) { + if (is_input_wire(wire)) { + f << indent << "value<" << width << "> prev_" << mangle(wire); + if (wire->has_attribute(ID::init)) { + f << " "; + dump_const_init(wire->attributes.at(ID::init)); + } + f << ";\n"; + } + for (auto edge_type : edge_types) { + if (edge_type.first.wire == wire) { + std::string prev, next; + if (is_input_wire(wire)) { + prev = "prev_" + mangle(edge_type.first.wire); + next = mangle(edge_type.first.wire); + } else { + prev = mangle(edge_type.first.wire) + ".curr"; + next = mangle(edge_type.first.wire) + ".next"; + } + prev += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()"; + next += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()"; + if (edge_type.second != RTLIL::STn) { + f << indent << "bool posedge_" << mangle(edge_type.first) << "() const {\n"; + inc_indent(); + f << indent << "return !" << prev << " && " << next << ";\n"; + dec_indent(); + f << indent << "}\n"; + } + if (edge_type.second != RTLIL::STp) { + f << indent << "bool negedge_" << mangle(edge_type.first) << "() const {\n"; + inc_indent(); + f << indent << "return " << prev << " && !" << next << ";\n"; + dec_indent(); + f << indent << "}\n"; + } } } } @@ -1343,16 +1520,36 @@ struct CxxrtlWorker { void dump_eval_method(RTLIL::Module *module) { inc_indent(); + f << indent << "bool converged = " << (eval_converges.at(module) ? "true" : "false") << ";\n"; if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) { + for (auto wire : module->wires()) { + if (edge_wires[wire]) { + for (auto edge_type : edge_types) { + if (edge_type.first.wire == wire) { + if (edge_type.second != RTLIL::STn) { + f << indent << "bool posedge_" << mangle(edge_type.first) << " = "; + f << "this->posedge_" << mangle(edge_type.first) << "();\n"; + } + if (edge_type.second != RTLIL::STp) { + f << indent << "bool negedge_" << mangle(edge_type.first) << " = "; + f << "this->negedge_" << mangle(edge_type.first) << "();\n"; + } + } + } + } + } for (auto wire : module->wires()) - dump_wire(wire, /*is_local=*/true); + dump_wire(wire, /*is_local_context=*/true); for (auto node : schedule[module]) { switch (node.type) { case FlowGraph::Node::Type::CONNECT: dump_connect(node.connect); break; - case FlowGraph::Node::Type::CELL: - dump_cell(node.cell); + case FlowGraph::Node::Type::CELL_SYNC: + dump_cell_sync(node.cell); + break; + case FlowGraph::Node::Type::CELL_EVAL: + dump_cell_eval(node.cell); break; case FlowGraph::Node::Type::PROCESS: dump_process(node.process); @@ -1360,14 +1557,7 @@ struct CxxrtlWorker { } } } - for (auto sync_type : sync_types) { - if (sync_type.first.wire->module == module) { - if (sync_type.second != RTLIL::STn) - f << indent << "posedge_" << mangle(sync_type.first) << " = false;\n"; - if (sync_type.second != RTLIL::STp) - f << indent << "negedge_" << mangle(sync_type.first) << " = false;\n"; - } - } + f << indent << "return converged;\n"; dec_indent(); } @@ -1378,39 +1568,13 @@ struct CxxrtlWorker { for (auto wire : module->wires()) { if (elided_wires.count(wire) || localized_wires.count(wire)) continue; - if (sync_wires[wire]) { - std::string wire_prev = mangle(wire) + "_prev"; - std::string wire_curr = mangle(wire) + ".curr"; - std::string wire_edge = mangle(wire) + "_edge"; - f << indent << "value<" << wire->width << "> " << wire_prev << " = " << wire_curr << ";\n"; - f << indent << "if (" << mangle(wire) << ".commit()) {\n"; - inc_indent(); - f << indent << "value<" << wire->width << "> " << wire_edge << " = " - << wire_prev << ".bit_xor(" << wire_curr << ");\n"; - for (auto sync_type : sync_types) { - if (sync_type.first.wire != wire) - continue; - if (sync_type.second != RTLIL::STn) { - f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && " - << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n"; - inc_indent(); - f << indent << "posedge_" << mangle(sync_type.first) << " = true;\n"; - dec_indent(); - } - if (sync_type.second != RTLIL::STp) { - f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && " - << "!" << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n"; - inc_indent(); - f << indent << "negedge_" << mangle(sync_type.first) << " = true;\n"; - dec_indent(); - } - f << indent << "changed = true;\n"; - } - dec_indent(); - f << indent << "}\n"; - } else if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0) { - f << indent << "changed |= " << mangle(wire) << ".commit();\n"; + if (is_input_wire(wire)) { + if (edge_wires[wire]) + f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n"; + continue; } + if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0) + f << indent << "changed |= " << mangle(wire) << ".commit();\n"; } if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) { for (auto memory : module->memories) { @@ -1466,10 +1630,10 @@ struct CxxrtlWorker { inc_indent(); for (auto wire : module->wires()) { if (wire->port_id != 0) - dump_wire(wire, /*is_local=*/false); + dump_wire(wire, /*is_local_context=*/false); } f << "\n"; - f << indent << "void eval() override {\n"; + f << indent << "bool eval() override {\n"; dump_eval_method(module); f << indent << "}\n"; f << "\n"; @@ -1506,7 +1670,7 @@ struct CxxrtlWorker { f << indent << "struct " << mangle(module) << " : public module {\n"; inc_indent(); for (auto wire : module->wires()) - dump_wire(wire, /*is_local=*/false); + dump_wire(wire, /*is_local_context=*/false); f << "\n"; bool has_memories = false; for (auto memory : module->memories) { @@ -1537,7 +1701,7 @@ struct CxxrtlWorker { } if (has_cells) f << "\n"; - f << indent << "void eval() override;\n"; + f << indent << "bool eval() override;\n"; f << indent << "bool commit() override;\n"; dec_indent(); f << indent << "}; // struct " << mangle(module) << "\n"; @@ -1549,7 +1713,7 @@ struct CxxrtlWorker { { if (module->get_bool_attribute(ID(cxxrtl.blackbox))) return; - f << indent << "void " << mangle(module) << "::eval() {\n"; + f << indent << "bool " << mangle(module) << "::eval() {\n"; dump_eval_method(module); f << indent << "}\n"; f << "\n"; @@ -1638,16 +1802,18 @@ struct CxxrtlWorker { log_assert(type == RTLIL::STp || type == RTLIL::STn || type == RTLIL::STe); RTLIL::SigBit sigbit = signal[0]; - if (!sync_types.count(sigbit)) - sync_types[sigbit] = type; - else if (sync_types[sigbit] != type) - sync_types[sigbit] = RTLIL::STe; - sync_wires.insert(signal.as_wire()); + if (!edge_types.count(sigbit)) + edge_types[sigbit] = type; + else if (edge_types[sigbit] != type) + edge_types[sigbit] = RTLIL::STe; + edge_wires.insert(signal.as_wire()); } void analyze_design(RTLIL::Design *design) { bool has_feedback_arcs = false; + bool has_buffered_wires = false; + for (auto module : design->modules()) { if (!design->selected_module(module)) continue; @@ -1680,6 +1846,10 @@ struct CxxrtlWorker { } } } + + // Black boxes converge by default, since their implementations are quite unlikely to require + // internal propagation of comb signals. + eval_converges[module] = true; continue; } @@ -1788,23 +1958,15 @@ struct CxxrtlWorker { if (wire->get_bool_attribute(ID::keep)) continue; if (wire->name.begins_with("$") && !elide_internal) continue; if (wire->name.begins_with("\\") && !elide_public) continue; - if (sync_wires[wire]) continue; - log_assert(flow.wire_defs[wire].size() == 1); - elided_wires[wire] = **flow.wire_defs[wire].begin(); + if (edge_wires[wire]) continue; + log_assert(flow.wire_comb_defs[wire].size() == 1); + elided_wires[wire] = **flow.wire_comb_defs[wire].begin(); } - // Elided wires that are outputs of internal cells are always connected to a well known port (Y). - // For user cells, there could be multiple of them, and we need a way to look up the port name - // knowing only the wire. - for (auto cell : module->cells()) - for (auto conn : cell->connections()) - if (conn.second.is_wire() && elided_wires.count(conn.second.as_wire())) - cell_wire_defs[cell][conn.second.as_wire()] = conn.first; - dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_defs; - for (auto wire_def : flow.wire_defs) - for (auto node : wire_def.second) - node_defs[node].insert(wire_def.first); + for (auto wire_comb_def : flow.wire_comb_defs) + for (auto node : wire_comb_def.second) + node_defs[node].insert(wire_comb_def.first); Scheduler<FlowGraph::Node> scheduler; dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_map; @@ -1843,10 +2005,9 @@ struct CxxrtlWorker { if (!feedback_wires.empty()) { has_feedback_arcs = true; - log("Module `%s' contains feedback arcs through wires:\n", module->name.c_str()); - for (auto wire : feedback_wires) { - log(" %s\n", wire->name.c_str()); - } + log("Module `%s' contains feedback arcs through wires:\n", log_id(module)); + for (auto wire : feedback_wires) + log(" %s\n", log_id(wire)); } for (auto wire : module->wires()) { @@ -1855,14 +2016,46 @@ struct CxxrtlWorker { if (wire->get_bool_attribute(ID::keep)) continue; if (wire->name.begins_with("$") && !localize_internal) continue; if (wire->name.begins_with("\\") && !localize_public) continue; - if (sync_wires[wire]) continue; - // Outputs of FF/$memrd cells and LHS of sync actions do not end up in defs. - if (flow.wire_defs[wire].size() != 1) continue; + if (edge_wires[wire]) continue; + if (flow.wire_sync_defs.count(wire) > 0) continue; localized_wires.insert(wire); } + + // For maximum performance, the state of the simulation (which is the same as the set of its double buffered + // wires, since using a singly buffered wire for any kind of state introduces a race condition) should contain + // no wires attached to combinatorial outputs. Feedback wires, by definition, make that impossible. However, + // it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases + // as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs + // also require more than one delta cycle to converge. + pool<const RTLIL::Wire*> buffered_wires; + for (auto wire : module->wires()) { + if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) { + if (!feedback_wires[wire]) + buffered_wires.insert(wire); + } + } + if (!buffered_wires.empty()) { + has_buffered_wires = true; + log("Module `%s' contains buffered combinatorial wires:\n", log_id(module)); + for (auto wire : buffered_wires) + log(" %s\n", log_id(wire)); + } + + eval_converges[module] = feedback_wires.empty() && buffered_wires.empty(); } - if (has_feedback_arcs) { - log("Feedback arcs require delta cycles during evaluation.\n"); + if (has_feedback_arcs || has_buffered_wires) { + // Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated + // by optimizing the design, if after `opt_clean -purge` there are any feedback wires remaining, it is very + // likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message. + const char *why_pessimistic = nullptr; + if (has_feedback_arcs) + why_pessimistic = "feedback wires"; + else if (has_buffered_wires) + why_pessimistic = "buffered combinatorial wires"; + log("\n"); + log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic); + if (!max_opt_level) + log("Increasing the optimization level may eliminate %s from the design.\n", why_pessimistic); } } @@ -1894,8 +2087,12 @@ struct CxxrtlWorker { void prepare_design(RTLIL::Design *design) { bool has_sync_init, has_packed_mem; + log_push(); check_design(design, has_sync_init, has_packed_mem); - if (has_sync_init) { + if (run_proc_flatten) { + Pass::call(design, "proc"); + Pass::call(design, "flatten"); + } else if (has_sync_init) { // We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those // in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.) Pass::call(design, "proc_prune"); @@ -1908,18 +2105,15 @@ struct CxxrtlWorker { if (has_sync_init || has_packed_mem) check_design(design, has_sync_init, has_packed_mem); log_assert(!(has_sync_init || has_packed_mem)); - - if (run_splitnets) { - Pass::call(design, "splitnets -driver"); + if (run_opt_clean_purge) Pass::call(design, "opt_clean -purge"); - } - log("\n"); + log_pop(); analyze_design(design); } }; struct CxxrtlBackend : public Backend { - static const int DEFAULT_OPT_LEVEL = 5; + static const int DEFAULT_OPT_LEVEL = 6; CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { } void help() YS_OVERRIDE @@ -1941,9 +2135,9 @@ struct CxxrtlBackend : public Backend { log(" top.step();\n"); log(" while (1) {\n"); log(" /* user logic */\n"); - log(" top.p_clk.next = value<1> {0u};\n"); + log(" top.p_clk = value<1> {0u};\n"); log(" top.step();\n"); - log(" top.p_clk.next = value<1> {1u};\n"); + log(" top.p_clk = value<1> {1u};\n"); log(" top.step();\n"); log(" }\n"); log(" }\n"); @@ -1965,18 +2159,20 @@ struct CxxrtlBackend : public Backend { log(" module debug(...);\n"); log(" (* cxxrtl.edge = \"p\" *) input clk;\n"); log(" input en;\n"); - log(" input [7:0] data;\n"); + log(" input [7:0] i_data;\n"); + log(" (* cxxrtl.sync *) output [7:0] o_data;\n"); log(" endmodule\n"); log("\n"); log("For this HDL interface, this backend will generate the following C++ interface:\n"); log("\n"); log(" struct bb_p_debug : public module {\n"); - log(" wire<1> p_clk;\n"); - log(" bool posedge_p_clk = false;\n"); - log(" wire<1> p_en;\n"); - log(" wire<8> p_data;\n"); + log(" value<1> p_clk;\n"); + log(" bool posedge_p_clk() const { /* ... */ }\n"); + log(" value<1> p_en;\n"); + log(" value<8> p_i_data;\n"); + log(" wire<8> p_o_data;\n"); log("\n"); - log(" void eval() override;\n"); + log(" bool eval() override;\n"); log(" bool commit() override;\n"); log("\n"); log(" static std::unique_ptr<bb_p_debug>\n"); @@ -1989,10 +2185,11 @@ struct CxxrtlBackend : public Backend { log(" namespace cxxrtl_design {\n"); log("\n"); log(" struct stderr_debug : public bb_p_debug {\n"); - log(" void eval() override {\n"); - log(" if (posedge_p_clk && p_en.curr)\n"); - log(" fprintf(stderr, \"debug: %%02x\\n\", p_data.curr.data[0]);\n"); - log(" bb_p_debug::eval();\n"); + log(" bool eval() override {\n"); + log(" if (posedge_p_clk() && p_en)\n"); + log(" fprintf(stderr, \"debug: %%02x\\n\", p_i_data.data[0]);\n"); + log(" p_o_data.next = p_i_data;\n"); + log(" return bb_p_debug::eval();\n"); log(" }\n"); log(" };\n"); log("\n"); @@ -2013,7 +2210,8 @@ struct CxxrtlBackend : public Backend { log(" parameter WIDTH = 8;\n"); log(" (* cxxrtl.edge = \"p\" *) input clk;\n"); log(" input en;\n"); - log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] data;\n"); + log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] i_data;\n"); + log(" (* cxxrtl.width = \"WIDTH\" *) output [WIDTH - 1:0] o_data;\n"); log(" endmodule\n"); log("\n"); log("For this parametric HDL interface, this backend will generate the following C++\n"); @@ -2022,7 +2220,8 @@ struct CxxrtlBackend : public Backend { log(" template<size_t WIDTH>\n"); log(" struct bb_p_debug : public module {\n"); log(" // ...\n"); - log(" wire<WIDTH> p_data;\n"); + log(" value<WIDTH> p_i_data;\n"); + log(" wire<WIDTH> p_o_data;\n"); log(" // ...\n"); log(" static std::unique_ptr<bb_p_debug<WIDTH>>\n"); log(" create(std::string name, metadata_map parameters, metadata_map attributes);\n"); @@ -2053,10 +2252,9 @@ struct CxxrtlBackend : public Backend { log("\n"); log(" cxxrtl.edge\n"); log(" only valid on inputs of black boxes. must be one of \"p\", \"n\", \"a\".\n"); - log(" if specified on signal `clk`, the generated code includes boolean fields\n"); - log(" `posedge_p_clk` (if \"p\"), `negedge_p_clk` (if \"n\"), or both (if \"a\"),\n"); - log(" as well as edge detection logic, simplifying implementation of clocked\n"); - log(" black boxes.\n"); + log(" if specified on signal `clk`, the generated code includes edge detectors\n"); + log(" `posedge_p_clk()` (if \"p\"), `negedge_p_clk()` (if \"n\"), or both (if\n"); + log(" \"a\"), simplifying implementation of clocked black boxes.\n"); log("\n"); log(" cxxrtl.template\n"); log(" only valid on black boxes. must contain a space separated sequence of\n"); @@ -2067,6 +2265,13 @@ struct CxxrtlBackend : public Backend { log(" only valid on ports of black boxes. must be a constant expression, which\n"); log(" is directly inserted into generated code.\n"); log("\n"); + log(" cxxrtl.comb, cxxrtl.sync\n"); + log(" only valid on outputs of black boxes. if specified, indicates that every\n"); + log(" bit of the output port is driven, correspondingly, by combinatorial or\n"); + log(" synchronous logic. this knowledge is used for scheduling optimizations.\n"); + log(" if neither is specified, the output will be pessimistically treated as\n"); + log(" driven by both combinatorial and synchronous logic.\n"); + log("\n"); log("The following options are supported by this backend:\n"); log("\n"); log(" -header\n"); @@ -2100,7 +2305,10 @@ struct CxxrtlBackend : public Backend { log(" like -O3, and localize public wires not marked (*keep*) if possible.\n"); log("\n"); log(" -O5\n"); - log(" like -O4, and run `splitnets -driver; opt_clean -purge` first.\n"); + log(" like -O4, and run `opt_clean -purge` first.\n"); + log("\n"); + log(" -O6\n"); + log(" like -O5, and run `proc; flatten` first.\n"); log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE @@ -2134,8 +2342,11 @@ struct CxxrtlBackend : public Backend { extra_args(f, filename, args, argidx); switch (opt_level) { + case 6: + worker.max_opt_level = true; + worker.run_proc_flatten = true; case 5: - worker.run_splitnets = true; + worker.run_opt_clean_purge = true; case 4: worker.localize_public = true; case 3: |