diff options
Diffstat (limited to 'passes')
30 files changed, 2509 insertions, 144 deletions
diff --git a/passes/cmds/Makefile.inc b/passes/cmds/Makefile.inc index 44a83b2b9..c8067a8be 100644 --- a/passes/cmds/Makefile.inc +++ b/passes/cmds/Makefile.inc @@ -29,4 +29,4 @@ OBJS += passes/cmds/chformal.o OBJS += passes/cmds/chtype.o OBJS += passes/cmds/blackbox.o OBJS += passes/cmds/ltp.o - +OBJS += passes/cmds/bugpoint.o diff --git a/passes/cmds/bugpoint.cc b/passes/cmds/bugpoint.cc new file mode 100644 index 000000000..606276e64 --- /dev/null +++ b/passes/cmds/bugpoint.cc @@ -0,0 +1,369 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2018 whitequark <whitequark@whitequark.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "backends/ilang/ilang_backend.h" + +USING_YOSYS_NAMESPACE +using namespace ILANG_BACKEND; +PRIVATE_NAMESPACE_BEGIN + +struct BugpointPass : public Pass { + BugpointPass() : Pass("bugpoint", "minimize testcases") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" bugpoint [options]\n"); + log("\n"); + log("This command minimizes testcases that crash Yosys. It removes an arbitrary part\n"); + log("of the design and recursively invokes Yosys with a given script, repeating these\n"); + log("steps while it can find a smaller design that still causes a crash. Once this\n"); + log("command finishes, it replaces the current design with the smallest testcase it\n"); + log("was able to produce.\n"); + log("\n"); + log("It is possible to specify the kinds of design part that will be removed. If none\n"); + log("are specified, all parts of design will be removed.\n"); + log("\n"); + log(" -yosys <filename>\n"); + log(" use this Yosys binary. if not specified, `yosys` is used.\n"); + log("\n"); + log(" -script <filename>\n"); + log(" use this script to crash Yosys. required.\n"); + log("\n"); + log(" -grep <string>\n"); + log(" only consider crashes that place this string in the log file.\n"); + log("\n"); + log(" -fast\n"); + log(" run `clean -purge` after each minimization step. converges faster, but\n"); + log(" produces larger testcases, and may fail to produce any testcase at all if\n"); + log(" the crash is related to dangling wires.\n"); + log("\n"); + log(" -clean\n"); + log(" run `clean -purge` before checking testcase and after finishing. produces\n"); + log(" smaller and more useful testcases, but may fail to produce any testcase\n"); + log(" at all if the crash is related to dangling wires.\n"); + log("\n"); + log(" -modules\n"); + log(" try to remove modules.\n"); + log("\n"); + log(" -ports\n"); + log(" try to remove module ports.\n"); + log("\n"); + log(" -cells\n"); + log(" try to remove cells.\n"); + log("\n"); + log(" -connections\n"); + log(" try to reconnect ports to 'x.\n"); + log("\n"); + } + + bool run_yosys(RTLIL::Design *design, string yosys_cmd, string script) + { + design->sort(); + + std::ofstream f("bugpoint-case.il"); + ILANG_BACKEND::dump_design(f, design, /*only_selected=*/false, /*flag_m=*/true, /*flag_n=*/false); + f.close(); + + string yosys_cmdline = stringf("%s -qq -L bugpoint-case.log -s %s bugpoint-case.il", yosys_cmd.c_str(), script.c_str()); + return run_command(yosys_cmdline) == 0; + } + + bool check_logfile(string grep) + { + if (grep.empty()) + return true; + + std::ifstream f("bugpoint-case.log"); + while (!f.eof()) + { + string line; + getline(f, line); + if (line.find(grep) != std::string::npos) + return true; + } + return false; + } + + RTLIL::Design *clean_design(RTLIL::Design *design, bool do_clean = true, bool do_delete = false) + { + if (!do_clean) + return design; + + RTLIL::Design *design_copy = new RTLIL::Design; + for (auto &it : design->modules_) + design_copy->add(it.second->clone()); + Pass::call(design_copy, "clean -purge"); + + if (do_delete) + delete design; + return design_copy; + } + + RTLIL::Design *simplify_something(RTLIL::Design *design, int &seed, bool stage2, bool modules, bool ports, bool cells, bool connections) + { + RTLIL::Design *design_copy = new RTLIL::Design; + for (auto &it : design->modules_) + design_copy->add(it.second->clone()); + + int index = 0; + if (modules) + { + for (auto &it : design_copy->modules_) + { + if (it.second->get_bool_attribute("\\blackbox")) + continue; + + if (index++ == seed) + { + log("Trying to remove module %s.\n", it.first.c_str()); + design_copy->remove(it.second); + return design_copy; + } + } + } + if (ports) + { + for (auto mod : design_copy->modules()) + { + if (mod->get_bool_attribute("\\blackbox")) + continue; + + for (auto wire : mod->wires()) + { + if (!stage2 && wire->get_bool_attribute("$bugpoint")) + continue; + + if (wire->port_input || wire->port_output) + { + if (index++ == seed) + { + log("Trying to remove module port %s.\n", log_signal(wire)); + wire->port_input = wire->port_output = false; + mod->fixup_ports(); + return design_copy; + } + } + } + } + } + if (cells) + { + for (auto mod : design_copy->modules()) + { + if (mod->get_bool_attribute("\\blackbox")) + continue; + + for (auto &it : mod->cells_) + { + if (index++ == seed) + { + log("Trying to remove cell %s.%s.\n", mod->name.c_str(), it.first.c_str()); + mod->remove(it.second); + return design_copy; + } + } + } + } + if (connections) + { + for (auto mod : design_copy->modules()) + { + if (mod->get_bool_attribute("\\blackbox")) + continue; + + for (auto cell : mod->cells()) + { + for (auto it : cell->connections_) + { + RTLIL::SigSpec port = cell->getPort(it.first); + bool is_undef = port.is_fully_undef(); + bool is_port = port.is_wire() && (port.as_wire()->port_input || port.as_wire()->port_output); + + if(is_undef || (!stage2 && is_port)) + continue; + + if (index++ == seed) + { + log("Trying to remove cell port %s.%s.%s.\n", mod->name.c_str(), cell->name.c_str(), it.first.c_str()); + RTLIL::SigSpec port_x(State::Sx, port.size()); + cell->unsetPort(it.first); + cell->setPort(it.first, port_x); + return design_copy; + } + + if (!stage2 && (cell->input(it.first) || cell->output(it.first)) && index++ == seed) + { + log("Trying to expose cell port %s.%s.%s as module port.\n", mod->name.c_str(), cell->name.c_str(), it.first.c_str()); + RTLIL::Wire *wire = mod->addWire(NEW_ID, port.size()); + wire->set_bool_attribute("$bugpoint"); + wire->port_input = cell->input(it.first); + wire->port_output = cell->output(it.first); + cell->unsetPort(it.first); + cell->setPort(it.first, wire); + mod->fixup_ports(); + return design_copy; + } + } + } + } + } + return NULL; + } + + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + string yosys_cmd = "yosys", script, grep; + bool fast = false, clean = false; + bool modules = false, ports = false, cells = false, connections = false, has_part = false; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + if (args[argidx] == "-yosys" && argidx + 1 < args.size()) { + yosys_cmd = args[++argidx]; + continue; + } + if (args[argidx] == "-script" && argidx + 1 < args.size()) { + script = args[++argidx]; + continue; + } + if (args[argidx] == "-grep" && argidx + 1 < args.size()) { + grep = args[++argidx]; + continue; + } + if (args[argidx] == "-fast") { + fast = true; + continue; + } + if (args[argidx] == "-clean") { + clean = true; + continue; + } + if (args[argidx] == "-modules") { + modules = true; + has_part = true; + continue; + } + if (args[argidx] == "-ports") { + ports = true; + has_part = true; + continue; + } + if (args[argidx] == "-cells") { + cells = true; + has_part = true; + continue; + } + if (args[argidx] == "-connections") { + connections = true; + has_part = true; + continue; + } + break; + } + extra_args(args, argidx, design); + + if (!has_part) + { + modules = true; + ports = true; + cells = true; + connections = true; + } + + if (!design->full_selection()) + log_cmd_error("This command only operates on fully selected designs!\n"); + + RTLIL::Design *crashing_design = clean_design(design, clean); + if (run_yosys(crashing_design, yosys_cmd, script)) + log_cmd_error("The provided script file and Yosys binary do not crash on this design!\n"); + if (!check_logfile(grep)) + log_cmd_error("The provided grep string is not found in the log file!\n"); + + int seed = 0, crashing_seed = seed; + bool found_something = false, stage2 = false; + while (true) + { + if (RTLIL::Design *simplified = simplify_something(crashing_design, seed, stage2, modules, ports, cells, connections)) + { + simplified = clean_design(simplified, fast, /*do_delete=*/true); + + bool crashes; + if (clean) + { + RTLIL::Design *testcase = clean_design(simplified); + crashes = !run_yosys(testcase, yosys_cmd, script); + delete testcase; + } + else + { + crashes = !run_yosys(simplified, yosys_cmd, script); + } + + if (crashes && check_logfile(grep)) + { + log("Testcase crashes.\n"); + if (crashing_design != design) + delete crashing_design; + crashing_design = simplified; + crashing_seed = seed; + found_something = true; + } + else + { + log("Testcase does not crash.\n"); + delete simplified; + seed++; + } + } + else + { + seed = 0; + if (found_something) + found_something = false; + else + { + if (!stage2) + { + log("Demoting introduced module ports.\n"); + stage2 = true; + } + else + { + log("Simplifications exhausted.\n"); + break; + } + } + } + } + + if (crashing_design != design) + { + Pass::call(design, "design -reset"); + crashing_design = clean_design(crashing_design, clean, /*do_delete=*/true); + for (auto &it : crashing_design->modules_) + design->add(it.second->clone()); + delete crashing_design; + } + } +} BugpointPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/cmds/chformal.cc b/passes/cmds/chformal.cc index 522758eae..7e32da65f 100644 --- a/passes/cmds/chformal.cc +++ b/passes/cmds/chformal.cc @@ -32,7 +32,7 @@ struct ChformalPass : public Pass { log(" chformal [types] [mode] [options] [selection]\n"); log("\n"); log("Make changes to the formal constraints of the design. The [types] options\n"); - log("the type of constraint to operate on. If none of the folling options is given,\n"); + log("the type of constraint to operate on. If none of the following options are given,\n"); log("the command will operate on all constraint types:\n"); log("\n"); log(" -assert $assert cells, representing assert(...) constraints\n"); @@ -59,7 +59,7 @@ struct ChformalPass : public Pass { log(" -assume2assert\n"); log(" -live2fair\n"); log(" -fair2live\n"); - log(" change the roles of cells as indicated. this options can be combined\n"); + log(" change the roles of cells as indicated. these options can be combined\n"); log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE diff --git a/passes/cmds/connect.cc b/passes/cmds/connect.cc index d480b79ac..f93bada27 100644 --- a/passes/cmds/connect.cc +++ b/passes/cmds/connect.cc @@ -137,7 +137,7 @@ struct ConnectPass : public Pass { if (!set_lhs.empty()) { if (!unset_expr.empty() || !port_cell.empty()) - log_cmd_error("Cant use -set together with -unset and/or -port.\n"); + log_cmd_error("Can't use -set together with -unset and/or -port.\n"); RTLIL::SigSpec sig_lhs, sig_rhs; if (!RTLIL::SigSpec::parse_sel(sig_lhs, design, module, set_lhs)) @@ -157,7 +157,7 @@ struct ConnectPass : public Pass { if (!unset_expr.empty()) { if (!port_cell.empty() || flag_nounset) - log_cmd_error("Cant use -unset together with -port and/or -nounset.\n"); + log_cmd_error("Can't use -unset together with -port and/or -nounset.\n"); RTLIL::SigSpec sig; if (!RTLIL::SigSpec::parse_sel(sig, design, module, unset_expr)) @@ -170,7 +170,7 @@ struct ConnectPass : public Pass { if (!port_cell.empty()) { if (flag_nounset) - log_cmd_error("Cant use -port together with -nounset.\n"); + log_cmd_error("Can't use -port together with -nounset.\n"); if (module->cells_.count(RTLIL::escape_id(port_cell)) == 0) log_cmd_error("Can't find cell %s.\n", port_cell.c_str()); diff --git a/passes/cmds/rename.cc b/passes/cmds/rename.cc index 4b4af0a40..698ce7235 100644 --- a/passes/cmds/rename.cc +++ b/passes/cmds/rename.cc @@ -61,6 +61,42 @@ static std::string derive_name_from_src(const std::string &src, int counter) return stringf("\\%s$%d", src_base.c_str(), counter); } +static IdString derive_name_from_wire(const RTLIL::Cell &cell) +{ + // Find output + const SigSpec *output = nullptr; + int num_outputs = 0; + for (auto &connection : cell.connections()) { + if (cell.output(connection.first)) { + output = &connection.second; + num_outputs++; + } + } + + if (num_outputs != 1) // Skip cells thad drive multiple outputs + return cell.name; + + std::string name = ""; + for (auto &chunk : output->chunks()) { + // Skip cells that drive privately named wires + if (!chunk.wire || chunk.wire->name.str()[0] == '$') + return cell.name; + + if (name != "") + name += "$"; + + name += chunk.wire->name.str(); + if (chunk.wire->width != chunk.width) { + name += "["; + if (chunk.width != 1) + name += std::to_string(chunk.offset + chunk.width) + ":"; + name += std::to_string(chunk.offset) + "]"; + } + } + + return name + cell.type.str(); +} + struct RenamePass : public Pass { RenamePass() : Pass("rename", "rename object in the design") { } void help() YS_OVERRIDE @@ -77,6 +113,10 @@ struct RenamePass : public Pass { log("Assign names auto-generated from the src attribute to all selected wires and\n"); log("cells with private names.\n"); log("\n"); + log(" rename -wire [selection]\n"); + log("Assign auto-generated names based on the wires they drive to all selected\n"); + log("cells with private names. Ignores cells driving privatly named wires.\n"); + log("\n"); log(" rename -enumerate [-pattern <pattern>] [selection]\n"); log("\n"); log("Assign short auto-generated names to all selected wires and cells with private\n"); @@ -98,6 +138,7 @@ struct RenamePass : public Pass { { std::string pattern_prefix = "_", pattern_suffix = "_"; bool flag_src = false; + bool flag_wire = false; bool flag_enumerate = false; bool flag_hide = false; bool flag_top = false; @@ -112,6 +153,11 @@ struct RenamePass : public Pass { got_mode = true; continue; } + if (arg == "-wire" && !got_mode) { + flag_wire = true; + got_mode = true; + continue; + } if (arg == "-enumerate" && !got_mode) { flag_enumerate = true; got_mode = true; @@ -167,6 +213,26 @@ struct RenamePass : public Pass { } } else + if (flag_wire) + { + extra_args(args, argidx, design); + + for (auto &mod : design->modules_) + { + RTLIL::Module *module = mod.second; + if (!design->selected(module)) + continue; + + dict<RTLIL::IdString, RTLIL::Cell*> new_cells; + for (auto &it : module->cells_) { + if (it.first[0] == '$' && design->selected(module, it.second)) + it.second->name = derive_name_from_wire(*it.second); + new_cells[it.second->name] = it.second; + } + module->cells_.swap(new_cells); + } + } + else if (flag_enumerate) { extra_args(args, argidx, design); diff --git a/passes/cmds/select.cc b/passes/cmds/select.cc index ba407ea8c..b5e8ef1af 100644 --- a/passes/cmds/select.cc +++ b/passes/cmds/select.cc @@ -987,7 +987,7 @@ struct SelectPass : public Pass { log("list of selected objects.\n"); log("\n"); log("Note that many commands support an optional [selection] argument that can be\n"); - log("used to YS_OVERRIDE the global selection for the command. The syntax of this\n"); + log("used to override the global selection for the command. The syntax of this\n"); log("optional argument is identical to the syntax of the <selection> argument\n"); log("described here.\n"); log("\n"); diff --git a/passes/cmds/setundef.cc b/passes/cmds/setundef.cc index a1dfa9b5c..56ef2d125 100644 --- a/passes/cmds/setundef.cc +++ b/passes/cmds/setundef.cc @@ -137,7 +137,7 @@ struct SetundefPass : public Pass { log(" replace with $anyconst drivers (for formal)\n"); log("\n"); log(" -random <seed>\n"); - log(" replace with random bits using the specified integer als seed\n"); + log(" replace with random bits using the specified integer as seed\n"); log(" value for the random number generator.\n"); log("\n"); log(" -init\n"); diff --git a/passes/cmds/show.cc b/passes/cmds/show.cc index a48873244..58acd302d 100644 --- a/passes/cmds/show.cc +++ b/passes/cmds/show.cc @@ -623,7 +623,7 @@ struct ShowPass : public Pass { log(" assigned to each unique value of this attribute.\n"); log("\n"); log(" -width\n"); - log(" annotate busses with a label indicating the width of the bus.\n"); + log(" annotate buses with a label indicating the width of the bus.\n"); log("\n"); log(" -signed\n"); log(" mark ports (A, B) that are declared as signed (using the [AB]_SIGNED\n"); diff --git a/passes/cmds/tee.cc b/passes/cmds/tee.cc index ff80f3859..ee96ace86 100644 --- a/passes/cmds/tee.cc +++ b/passes/cmds/tee.cc @@ -37,7 +37,7 @@ struct TeePass : public Pass { log("specified logfile(s).\n"); log("\n"); log(" -q\n"); - log(" Do not print output to the normal destination (console and/or log file)\n"); + log(" Do not print output to the normal destination (console and/or log file).\n"); log("\n"); log(" -o logfile\n"); log(" Write output to this file, truncate if exists.\n"); @@ -46,7 +46,7 @@ struct TeePass : public Pass { log(" Write output to this file, append if exists.\n"); log("\n"); log(" +INT, -INT\n"); - log(" Add/subract INT from the -v setting for this command.\n"); + log(" Add/subtract INT from the -v setting for this command.\n"); log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE diff --git a/passes/fsm/fsm_detect.cc b/passes/fsm/fsm_detect.cc index fc504e98c..5ae991b28 100644 --- a/passes/fsm/fsm_detect.cc +++ b/passes/fsm/fsm_detect.cc @@ -196,13 +196,13 @@ static void detect_fsm(RTLIL::Wire *wire) vector<string> warnings; if (is_module_port) - warnings.push_back("Forcing fsm recoding on module port might result in larger circuit.\n"); + warnings.push_back("Forcing FSM recoding on module port might result in larger circuit.\n"); if (!looks_like_good_state_reg) - warnings.push_back("Users of state reg look like fsm recoding might result in larger circuit.\n"); + warnings.push_back("Users of state reg look like FSM recoding might result in larger circuit.\n"); if (has_init_attr) - warnings.push_back("Init value on fsm state registers are ignored. Possible simulation-synthesis mismatch!"); + warnings.push_back("Initialization value on FSM state register is ignored. Possible simulation-synthesis mismatch!\n"); if (!looks_like_state_reg) warnings.push_back("Doesn't look like a proper FSM. Possible simulation-synthesis mismatch!\n"); @@ -236,7 +236,7 @@ static void detect_fsm(RTLIL::Wire *wire) log(" Users of register don't seem to benefit from recoding.\n"); if (has_init_attr) - log(" Register has an initialization value."); + log(" Register has an initialization value.\n"); if (is_self_resetting) log(" Circuit seems to be self-resetting.\n"); diff --git a/passes/fsm/fsm_extract.cc b/passes/fsm/fsm_extract.cc index 67551f673..6095eaf30 100644 --- a/passes/fsm/fsm_extract.cc +++ b/passes/fsm/fsm_extract.cc @@ -178,7 +178,7 @@ undef_bit_in_next_state: log_state_in = fsm_data.state_table.at(state_in); if (states.count(ce.values_map(ce.assign_map(dff_in)).as_const()) == 0) { - log(" transition: %10s %s -> INVALID_STATE(%s) %s <ignored invalid transistion!>%s\n", + log(" transition: %10s %s -> INVALID_STATE(%s) %s <ignored invalid transition!>%s\n", log_signal(log_state_in), log_signal(tr.ctrl_in), log_signal(ce.values_map(ce.assign_map(dff_in))), log_signal(tr.ctrl_out), undef_bit_in_next_state_mode ? " SHORTENED" : ""); @@ -194,7 +194,7 @@ undef_bit_in_next_state: log_signal(log_state_in), log_signal(tr.ctrl_in), log_signal(fsm_data.state_table[tr.state_out]), log_signal(tr.ctrl_out)); } else { - log(" transition: %10s %s -> %10s %s <ignored undef transistion!>\n", + log(" transition: %10s %s -> %10s %s <ignored undef transition!>\n", log_signal(log_state_in), log_signal(tr.ctrl_in), log_signal(fsm_data.state_table[tr.state_out]), log_signal(tr.ctrl_out)); } diff --git a/passes/hierarchy/hierarchy.cc b/passes/hierarchy/hierarchy.cc index 0c782b8ab..0e28dbca2 100644 --- a/passes/hierarchy/hierarchy.cc +++ b/passes/hierarchy/hierarchy.cc @@ -543,7 +543,7 @@ struct HierarchyPass : public Pass { log(" an unknown module is used as cell type.\n"); log("\n"); log(" -simcheck\n"); - log(" like -check, but also thow an error if blackbox modules are\n"); + log(" like -check, but also throw an error if blackbox modules are\n"); log(" instantiated, and throw an error if the design has no top module\n"); log("\n"); log(" -purge_lib\n"); diff --git a/passes/memory/memory_collect.cc b/passes/memory/memory_collect.cc index 70d98713c..369fcc84e 100644 --- a/passes/memory/memory_collect.cc +++ b/passes/memory/memory_collect.cc @@ -184,9 +184,6 @@ Cell *handle_memory(Module *module, RTLIL::Memory *memory) mem->parameters["\\OFFSET"] = Const(memory->start_offset); mem->parameters["\\SIZE"] = Const(memory->size); mem->parameters["\\ABITS"] = Const(addr_bits); - - while (GetSize(init_data) > 1 && init_data.bits.back() == State::Sx && init_data.bits[GetSize(init_data)-2] == State::Sx) - init_data.bits.pop_back(); mem->parameters["\\INIT"] = init_data; log_assert(sig_wr_clk.size() == wr_ports); diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc index 0f596b1f4..c3e0a2a40 100644 --- a/passes/opt/Makefile.inc +++ b/passes/opt/Makefile.inc @@ -6,12 +6,12 @@ OBJS += passes/opt/opt_reduce.o OBJS += passes/opt/opt_rmdff.o OBJS += passes/opt/opt_clean.o OBJS += passes/opt/opt_expr.o -OBJS += passes/opt/opt_lut.o ifneq ($(SMALL),1) OBJS += passes/opt/share.o OBJS += passes/opt/wreduce.o OBJS += passes/opt/opt_demorgan.o OBJS += passes/opt/rmports.o +OBJS += passes/opt/opt_lut.o endif diff --git a/passes/opt/opt_expr.cc b/passes/opt/opt_expr.cc index 610edc5e9..26a3ca7bc 100644 --- a/passes/opt/opt_expr.cc +++ b/passes/opt/opt_expr.cc @@ -259,6 +259,22 @@ bool is_one_or_minus_one(const Const &value, bool is_signed, bool &is_negative) return last_bit_one; } +int get_highest_hot_index(RTLIL::SigSpec signal) +{ + for (int i = GetSize(signal) - 1; i >= 0; i--) + { + if (signal[i] == RTLIL::State::S0) + continue; + + if (signal[i] == RTLIL::State::S1) + return i; + + break; + } + + return -1; +} + // if the signal has only one bit set, return the index of that bit. // otherwise return -1 int get_onehot_bit_index(RTLIL::SigSpec signal) @@ -1344,118 +1360,139 @@ void replace_const_cells(RTLIL::Design *design, RTLIL::Module *module, bool cons } } - // replace a<0 or a>=0 with the top bit of a + // simplify comparisons if (do_fine && (cell->type == "$lt" || cell->type == "$ge" || cell->type == "$gt" || cell->type == "$le")) { - //used to decide whether the signal needs to be negated - bool is_lt = false; - - //references the variable signal in the comparison - RTLIL::SigSpec sigVar; - - //references the constant signal in the comparison - RTLIL::SigSpec sigConst; - - // note that this signal must be constant for the optimization - // to take place, but it is not checked beforehand. - // If new passes are added, this signal must be checked for const-ness - - //width of the variable port - int width; - int const_width; - - bool var_signed; - - if (cell->type == "$lt" || cell->type == "$ge") { - is_lt = cell->type == "$lt" ? 1 : 0; - sigVar = cell->getPort("\\A"); - sigConst = cell->getPort("\\B"); - width = cell->parameters["\\A_WIDTH"].as_int(); - const_width = cell->parameters["\\B_WIDTH"].as_int(); - var_signed = cell->parameters["\\A_SIGNED"].as_bool(); - } else - if (cell->type == "$gt" || cell->type == "$le") { - is_lt = cell->type == "$gt" ? 1 : 0; - sigVar = cell->getPort("\\B"); - sigConst = cell->getPort("\\A"); - width = cell->parameters["\\B_WIDTH"].as_int(); - const_width = cell->parameters["\\A_WIDTH"].as_int(); - var_signed = cell->parameters["\\B_SIGNED"].as_bool(); - } else - log_abort(); + IdString cmp_type = cell->type; + SigSpec var_sig = cell->getPort("\\A"); + SigSpec const_sig = cell->getPort("\\B"); + int var_width = cell->parameters["\\A_WIDTH"].as_int(); + int const_width = cell->parameters["\\B_WIDTH"].as_int(); + bool is_signed = cell->getParam("\\A_SIGNED").as_bool(); - // replace a(signed) < 0 with the high bit of a - if (sigConst.is_fully_const() && sigConst.is_fully_zero() && var_signed == true) + if (!const_sig.is_fully_const()) { - RTLIL::SigSpec a_prime(RTLIL::State::S0, cell->parameters["\\Y_WIDTH"].as_int()); - a_prime[0] = sigVar[width - 1]; - if (is_lt) { - log("Replacing %s cell `%s' (implementing X<0) with X[%d]: %s\n", - log_id(cell->type), log_id(cell), width-1, log_signal(a_prime)); - module->connect(cell->getPort("\\Y"), a_prime); - module->remove(cell); - } else { - log("Replacing %s cell `%s' (implementing X>=0) with ~X[%d]: %s\n", - log_id(cell->type), log_id(cell), width-1, log_signal(a_prime)); - module->addNot(NEW_ID, a_prime, cell->getPort("\\Y")); - module->remove(cell); - } - did_something = true; - goto next_cell; - } else - if (sigConst.is_fully_const() && sigConst.is_fully_def() && var_signed == false) + std::swap(var_sig, const_sig); + std::swap(var_width, const_width); + if (cmp_type == "$gt") + cmp_type = "$lt"; + else if (cmp_type == "$lt") + cmp_type = "$gt"; + else if (cmp_type == "$ge") + cmp_type = "$le"; + else if (cmp_type == "$le") + cmp_type = "$ge"; + } + + if (const_sig.is_fully_def() && const_sig.is_fully_const()) { - if (sigConst.is_fully_zero()) { - RTLIL::SigSpec a_prime(RTLIL::State::S0, GetSize(cell->getPort("\\Y"))); - if (is_lt) { - log("Replacing %s cell `%s' (implementing unsigned X<0) with constant false.\n", - log_id(cell->type), log_id(cell)); - a_prime[0] = RTLIL::State::S0; - } else { - log("Replacing %s cell `%s' (implementing unsigned X>=0) with constant true.\n", - log_id(cell->type), log_id(cell)); - a_prime[0] = RTLIL::State::S1; + std::string condition, replacement; + SigSpec replace_sig(State::S0, GetSize(cell->getPort("\\Y"))); + bool replace = false; + bool remove = false; + + if (!is_signed) + { /* unsigned */ + if (const_sig.is_fully_zero() && cmp_type == "$lt") { + condition = "unsigned X<0"; + replacement = "constant 0"; + replace_sig[0] = State::S0; + replace = true; + } + if (const_sig.is_fully_zero() && cmp_type == "$ge") { + condition = "unsigned X>=0"; + replacement = "constant 1"; + replace_sig[0] = State::S1; + replace = true; + } + if (const_width == var_width && const_sig.is_fully_ones() && cmp_type == "$gt") { + condition = "unsigned X>~0"; + replacement = "constant 0"; + replace_sig[0] = State::S0; + replace = true; + } + if (const_width == var_width && const_sig.is_fully_ones() && cmp_type == "$le") { + condition = "unsigned X<=~0"; + replacement = "constant 1"; + replace_sig[0] = State::S1; + replace = true; } - module->connect(cell->getPort("\\Y"), a_prime); - module->remove(cell); - did_something = true; - goto next_cell; - } - int const_bit_set = get_onehot_bit_index(sigConst); - if (const_bit_set >= 0 && const_bit_set < width) { - int bit_set = const_bit_set; - RTLIL::SigSpec a_prime(RTLIL::State::S0, width - bit_set); - for (int i = bit_set; i < width; i++) { - a_prime[i - bit_set] = sigVar[i]; + int const_bit_hot = get_onehot_bit_index(const_sig); + if (const_bit_hot >= 0 && const_bit_hot < var_width) + { + RTLIL::SigSpec var_high_sig(RTLIL::State::S0, var_width - const_bit_hot); + for (int i = const_bit_hot; i < var_width; i++) { + var_high_sig[i - const_bit_hot] = var_sig[i]; + } + + if (cmp_type == "$lt") + { + condition = stringf("unsigned X<%s", log_signal(const_sig)); + replacement = stringf("!X[%d:%d]", var_width - 1, const_bit_hot); + module->addLogicNot(NEW_ID, var_high_sig, cell->getPort("\\Y")); + remove = true; + } + if (cmp_type == "$ge") + { + condition = stringf("unsigned X>=%s", log_signal(const_sig)); + replacement = stringf("|X[%d:%d]", var_width - 1, const_bit_hot); + module->addReduceOr(NEW_ID, var_high_sig, cell->getPort("\\Y")); + remove = true; + } } - if (is_lt) { - log("Replacing %s cell `%s' (implementing unsigned X<%s) with !X[%d:%d]: %s.\n", - log_id(cell->type), log_id(cell), log_signal(sigConst), width - 1, bit_set, log_signal(a_prime)); - module->addLogicNot(NEW_ID, a_prime, cell->getPort("\\Y")); - } else { - log("Replacing %s cell `%s' (implementing unsigned X>=%s) with |X[%d:%d]: %s.\n", - log_id(cell->type), log_id(cell), log_signal(sigConst), width - 1, bit_set, log_signal(a_prime)); - module->addReduceOr(NEW_ID, a_prime, cell->getPort("\\Y")); + + int const_bit_set = get_highest_hot_index(const_sig); + if(const_bit_set >= var_width) + { + string cmp_name; + if (cmp_type == "$lt" || cmp_type == "$le") + { + if (cmp_type == "$lt") cmp_name = "<"; + if (cmp_type == "$le") cmp_name = "<="; + condition = stringf("unsigned X[%d:0]%s%s", var_width - 1, cmp_name.c_str(), log_signal(const_sig)); + replacement = "constant 1"; + replace_sig[0] = State::S1; + replace = true; + } + if (cmp_type == "$gt" || cmp_type == "$ge") + { + if (cmp_type == "$gt") cmp_name = ">"; + if (cmp_type == "$ge") cmp_name = ">="; + condition = stringf("unsigned X[%d:0]%s%s", var_width - 1, cmp_name.c_str(), log_signal(const_sig)); + replacement = "constant 0"; + replace_sig[0] = State::S0; + replace = true; + } } - module->remove(cell); - did_something = true; - goto next_cell; } - else if(const_bit_set >= width && const_bit_set >= 0){ - RTLIL::SigSpec a_prime(RTLIL::State::S0, 1); - if(is_lt){ - a_prime[0] = RTLIL::State::S1; - log("Replacing %s cell `%s' (implementing unsigned X[%d:0] < %s[%d:0]) with constant 0.\n", log_id(cell->type), log_id(cell), width-1, log_signal(sigConst),const_width-1); + else + { /* signed */ + if (const_sig.is_fully_zero() && cmp_type == "$lt") + { + condition = "signed X<0"; + replacement = stringf("X[%d]", var_width - 1); + replace_sig[0] = var_sig[var_width - 1]; + replace = true; } - else{ - log("Replacing %s cell `%s' (implementing unsigned X[%d:0]>= %s[%d:0]) with constant 1.\n", log_id(cell->type), log_id(cell), width-1, log_signal(sigConst),const_width-1); + if (const_sig.is_fully_zero() && cmp_type == "$ge") + { + condition = "signed X>=0"; + replacement = stringf("X[%d]", var_width - 1); + module->addNot(NEW_ID, var_sig[var_width - 1], cell->getPort("\\Y")); + remove = true; } - module->connect(cell->getPort("\\Y"), a_prime); + } + + if (replace || remove) + { + log("Replacing %s cell `%s' (implementing %s) with %s.\n", + log_id(cell->type), log_id(cell), condition.c_str(), replacement.c_str()); + if (replace) + module->connect(cell->getPort("\\Y"), replace_sig); module->remove(cell); did_something = true; goto next_cell; - } } } @@ -1477,7 +1514,7 @@ struct OptExprPass : public Pass { log(" opt_expr [options] [selection]\n"); log("\n"); log("This pass performs const folding on internal cell types with constant inputs.\n"); - log("It also performs some simple expression rewritring.\n"); + log("It also performs some simple expression rewriting.\n"); log("\n"); log(" -mux_undef\n"); log(" remove 'undef' inputs from $mux, $pmux and $_MUX_ cells\n"); diff --git a/passes/opt/opt_lut.cc b/passes/opt/opt_lut.cc index be050c713..26855fd70 100644 --- a/passes/opt/opt_lut.cc +++ b/passes/opt/opt_lut.cc @@ -36,7 +36,7 @@ struct OptLutWorker dict<RTLIL::Cell*, pool<RTLIL::Cell*>> luts_dlogics; dict<RTLIL::Cell*, pool<int>> luts_dlogic_inputs; - int combined_count = 0; + int eliminated_count = 0, combined_count = 0; bool evaluate_lut(RTLIL::Cell *lut, dict<SigBit, bool> inputs) { @@ -133,7 +133,7 @@ struct OptLutWorker // Second, make sure that the connection to dedicated logic is legal. If it is not legal, // it means one of the two things: // * The connection is spurious. I.e. this is dedicated logic that will be packed - // with some other LUT, and it just happens to be conected to this LUT as well. + // with some other LUT, and it just happens to be connected to this LUT as well. // * The connection is illegal. // In either of these cases, we don't need to concern ourselves with preserving the connection // between this LUT and this dedicated logic cell. @@ -188,7 +188,7 @@ struct OptLutWorker show_stats_by_arity(); log("\n"); - log("Combining LUTs.\n"); + log("Eliminating LUTs.\n"); pool<RTLIL::Cell*> worklist = luts; while (worklist.size()) { @@ -198,6 +198,106 @@ struct OptLutWorker break; } + auto lut = worklist.pop(); + SigSpec lut_input = sigmap(lut->getPort("\\A")); + pool<int> &lut_dlogic_inputs = luts_dlogic_inputs[lut]; + + vector<SigBit> lut_inputs; + for (auto &bit : lut_input) + { + if (bit.wire) + lut_inputs.push_back(sigmap(bit)); + } + + bool const0_match = true; + bool const1_match = true; + vector<bool> input_matches; + for (size_t i = 0; i < lut_inputs.size(); i++) + input_matches.push_back(true); + + for (int eval = 0; eval < 1 << lut_inputs.size(); eval++) + { + dict<SigBit, bool> eval_inputs; + for (size_t i = 0; i < lut_inputs.size(); i++) + eval_inputs[lut_inputs[i]] = (eval >> i) & 1; + bool value = evaluate_lut(lut, eval_inputs); + if (value != 0) + const0_match = false; + if (value != 1) + const1_match = false; + for (size_t i = 0; i < lut_inputs.size(); i++) + { + if (value != eval_inputs[lut_inputs[i]]) + input_matches[i] = false; + } + } + + int input_match = -1; + for (size_t i = 0; i < lut_inputs.size(); i++) + if (input_matches[i]) + input_match = i; + + if (const0_match || const1_match || input_match != -1) + { + log("Found redundant cell %s.%s.\n", log_id(module), log_id(lut)); + + SigBit value; + if (const0_match) + { + log(" Cell evaluates constant 0.\n"); + value = State::S0; + } + if (const1_match) + { + log(" Cell evaluates constant 1.\n"); + value = State::S1; + } + if (input_match != -1) { + log(" Cell evaluates signal %s.\n", log_signal(lut_inputs[input_match])); + value = lut_inputs[input_match]; + } + + if (lut_dlogic_inputs.size()) + { + log(" Not eliminating cell (connected to dedicated logic).\n"); + } + else + { + SigSpec lut_output = lut->getPort("\\Y"); + for (auto &port : index.query_ports(lut_output)) + { + if (port.cell != lut && luts.count(port.cell)) + worklist.insert(port.cell); + } + + module->connect(lut_output, value); + sigmap.add(lut_output, value); + + module->remove(lut); + luts.erase(lut); + luts_arity.erase(lut); + luts_dlogics.erase(lut); + luts_dlogic_inputs.erase(lut); + + eliminated_count++; + if (limit > 0) + limit--; + } + } + } + show_stats_by_arity(); + + log("\n"); + log("Combining LUTs.\n"); + worklist = luts; + while (worklist.size()) + { + if (limit == 0) + { + log("Limit reached.\n"); + break; + } + auto lutA = worklist.pop(); SigSpec lutA_input = sigmap(lutA->getPort("\\A")); SigSpec lutA_output = sigmap(lutA->getPort("\\Y")[0]); @@ -487,16 +587,20 @@ struct OptLutPass : public Pass { } extra_args(args, argidx, design); - int total_count = 0; + int eliminated_count = 0, combined_count = 0; for (auto module : design->selected_modules()) { - OptLutWorker worker(dlogic, module, limit - total_count); - total_count += worker.combined_count; + OptLutWorker worker(dlogic, module, limit - eliminated_count - combined_count); + eliminated_count += worker.eliminated_count; + combined_count += worker.combined_count; } - if (total_count) + if (eliminated_count) + design->scratchpad_set_bool("opt.did_something", true); + if (combined_count) design->scratchpad_set_bool("opt.did_something", true); log("\n"); - log("Combined %d LUTs.\n", total_count); + log("Eliminated %d LUTs.\n", eliminated_count); + log("Combined %d LUTs.\n", combined_count); } } OptLutPass; diff --git a/passes/opt/share.cc b/passes/opt/share.cc index b80280829..c85c27427 100644 --- a/passes/opt/share.cc +++ b/passes/opt/share.cc @@ -710,8 +710,12 @@ struct ShareWorker RTLIL::Cell *supercell = module->addCell(NEW_ID, c1); RTLIL::SigSpec addr1 = c1->getPort("\\ADDR"); RTLIL::SigSpec addr2 = c2->getPort("\\ADDR"); - if (addr1 != addr2) - supercell->setPort("\\ADDR", module->Mux(NEW_ID, addr2, addr1, act)); + if (GetSize(addr1) < GetSize(addr2)) + addr1.extend_u0(GetSize(addr2)); + else + addr2.extend_u0(GetSize(addr1)); + supercell->setPort("\\ADDR", addr1 != addr2 ? module->Mux(NEW_ID, addr2, addr1, act) : addr1); + supercell->parameters["\\ABITS"] = RTLIL::Const(GetSize(addr1)); supercell_aux.insert(module->addPos(NEW_ID, supercell->getPort("\\DATA"), c2->getPort("\\DATA"))); supercell_aux.insert(supercell); return supercell; diff --git a/passes/opt/wreduce.cc b/passes/opt/wreduce.cc index 0164f58d6..8063b86a6 100644 --- a/passes/opt/wreduce.cc +++ b/passes/opt/wreduce.cc @@ -235,8 +235,11 @@ struct WreduceWorker } else { while (GetSize(sig) > 0) { - auto info = mi.query(sig[GetSize(sig)-1]); + auto bit = sig[GetSize(sig)-1]; + if (keep_bits.count(bit)) + break; + auto info = mi.query(bit); if (info->is_output || GetSize(info->ports) > 1) break; diff --git a/passes/proc/proc_clean.cc b/passes/proc/proc_clean.cc index b9e43d1db..52141a8ec 100644 --- a/passes/proc/proc_clean.cc +++ b/passes/proc/proc_clean.cc @@ -77,18 +77,42 @@ void proc_clean_switch(RTLIL::SwitchRule *sw, RTLIL::CaseRule *parent, bool &did } else { - bool all_cases_are_empty = true; - for (auto cs : sw->cases) { - if (cs->actions.size() != 0 || cs->switches.size() != 0) - all_cases_are_empty = false; + bool all_fully_def = true; + for (auto cs : sw->cases) + { if (max_depth != 0) proc_clean_case(cs, did_something, count, max_depth-1); + int size = 0; + for (auto cmp : cs->compare) + { + size += cmp.size(); + if (!cmp.is_fully_def()) + all_fully_def = false; + } + if (sw->signal.size() != size) + all_fully_def = false; } - if (all_cases_are_empty) { - did_something = true; - for (auto cs : sw->cases) - delete cs; - sw->cases.clear(); + if (all_fully_def) + { + for (auto cs = sw->cases.begin(); cs != sw->cases.end();) + { + if ((*cs)->empty()) + { + did_something = true; + delete *cs; + cs = sw->cases.erase(cs); + } + else ++cs; + } + } + else + { + while (!sw->cases.empty() && sw->cases.back()->empty()) + { + did_something = true; + delete sw->cases.back(); + sw->cases.pop_back(); + } } } } @@ -106,7 +130,7 @@ void proc_clean_case(RTLIL::CaseRule *cs, bool &did_something, int &count, int m } for (size_t i = 0; i < cs->switches.size(); i++) { RTLIL::SwitchRule *sw = cs->switches[i]; - if (sw->cases.size() == 0) { + if (sw->empty()) { cs->switches.erase(cs->switches.begin() + (i--)); did_something = true; delete sw; diff --git a/passes/techmap/Makefile.inc b/passes/techmap/Makefile.inc index 4faa0ab00..cf9e198ad 100644 --- a/passes/techmap/Makefile.inc +++ b/passes/techmap/Makefile.inc @@ -36,6 +36,7 @@ OBJS += passes/techmap/attrmvcp.o OBJS += passes/techmap/attrmap.o OBJS += passes/techmap/zinit.o OBJS += passes/techmap/dff2dffs.o +OBJS += passes/techmap/flowmap.o endif GENFILES += passes/techmap/techmap.inc diff --git a/passes/techmap/dffinit.cc b/passes/techmap/dffinit.cc index a8eecc970..48390488e 100644 --- a/passes/techmap/dffinit.cc +++ b/passes/techmap/dffinit.cc @@ -43,18 +43,37 @@ struct DffinitPass : public Pass { log(" initial value of 1 or 0. (multi-bit values are not supported in this\n"); log(" mode.)\n"); log("\n"); + log(" -strinit <string for high> <string for low> \n"); + log(" use string values in the command line to represent a single-bit\n"); + log(" initial value of 1 or 0. (multi-bit values are not supported in this\n"); + log(" mode.)\n"); + log("\n"); + log(" -noreinit\n"); + log(" fail if the FF cell has already a defined initial value set in other\n"); + log(" passes and the initial value of the net it drives is not equal to\n"); + log(" the already defined initial value.\n"); + log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { log_header(design, "Executing DFFINIT pass (set INIT param on FF cells).\n"); dict<IdString, dict<IdString, IdString>> ff_types; - bool highlow_mode = false; + bool highlow_mode = false, noreinit = false; + std::string high_string, low_string; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { if (args[argidx] == "-highlow") { highlow_mode = true; + high_string = "high"; + low_string = "low"; + continue; + } + if (args[argidx] == "-strinit" && argidx+2 < args.size()) { + highlow_mode = true; + high_string = args[++argidx]; + low_string = args[++argidx]; continue; } if (args[argidx] == "-ff" && argidx+3 < args.size()) { @@ -64,6 +83,10 @@ struct DffinitPass : public Pass { ff_types[cell_name][output_port] = init_param; continue; } + if (args[argidx] == "-noreinit") { + noreinit = true; + continue; + } break; } extra_args(args, argidx, design); @@ -112,6 +135,10 @@ struct DffinitPass : public Pass { continue; while (GetSize(value.bits) <= i) value.bits.push_back(State::S0); + if (noreinit && value.bits[i] != State::Sx && value.bits[i] != init_bits.at(sig[i])) + log_error("Trying to assign a different init value for %s.%s.%s which technically " + "have a conflicted init value.\n", + log_id(module), log_id(cell), log_id(it.second)); value.bits[i] = init_bits.at(sig[i]); cleanup_bits.insert(sig[i]); } @@ -121,9 +148,9 @@ struct DffinitPass : public Pass { log_error("Multi-bit init value for %s.%s.%s is incompatible with -highlow mode.\n", log_id(module), log_id(cell), log_id(it.second)); if (value[0] == State::S1) - value = Const("high"); + value = Const(high_string); else - value = Const("low"); + value = Const(low_string); } log("Setting %s.%s.%s (port=%s, net=%s) to %s.\n", log_id(module), log_id(cell), log_id(it.second), diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc new file mode 100644 index 000000000..ddbd7bf5d --- /dev/null +++ b/passes/techmap/flowmap.cc @@ -0,0 +1,1613 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2018 whitequark <whitequark@whitequark.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +// [[CITE]] FlowMap algorithm +// Jason Cong; Yuzheng Ding, "An Optimal Technology Mapping Algorithm for Delay Optimization in Lookup-Table Based FPGA Designs," +// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, Vol. 13, pp. 1-12, Jan. 1994. +// doi: 10.1109/43.273754 + +// [[CITE]] FlowMap-r algorithm +// Jason Cong; Yuzheng Ding, "On Area/Depth Tradeoff in LUT-Based FPGA Technology Mapping," +// Very Large Scale Integration Systems, IEEE Transactions on, Vol. 2, June 1994. +// doi: 10.1109/92.28574 + +// Required reading material: +// +// Min-cut max-flow theorem: +// https://www.coursera.org/lecture/algorithms-part2/maxflow-mincut-theorem-beb9G +// FlowMap paper: +// http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf (short version) +// https://limsk.ece.gatech.edu/book/papers/flowmap.pdf (long version) +// FlowMap-r paper: +// http://cadlab.cs.ucla.edu/~cong/papers/dac93.pdf (short version) +// https://sci-hub.tw/10.1109/92.285741 (long version) + +// Notes on correspondence between paper and implementation: +// +// 1. In the FlowMap paper, the nodes are logic elements (analogous to Yosys cells) and edges are wires. However, in our implementation, +// we use an inverted approach: the nodes are Yosys wire bits, and the edges are derived from (but aren't represented by) Yosys cells. +// This may seem counterintuitive. Three observations may help understanding this. First, for a cell with a 1-bit Y output that is +// the sole driver of its output net (which is the typical case), these representations are equivalent, because there is an exact +// correspondence between cells and output wires. Second, in the paper, primary inputs (analogous to Yosys cell or module ports) are +// nodes, and in Yosys, inputs are wires; our approach allows a direct mapping from both primary inputs and 1-output logic elements to +// flow graph nodes. Third, Yosys cells may have multiple outputs or multi-bit outputs, and by using Yosys wire bits as flow graph nodes, +// such cells are supported without any additional effort; any Yosys cell with n output wire bits ends up being split into n flow graph +// nodes. +// +// 2. The FlowMap paper introduces three networks: Nt, Nt', and Nt''. The network Nt is directly represented by a subgraph of RTLIL graph, +// which is parsed into an equivalent but easier to traverse representation in FlowmapWorker. The network Nt' is built explicitly +// from a subgraph of Nt, and uses a similar representation in FlowGraph. The network Nt'' is implicit in FlowGraph, which is possible +// because of the following observation: each Nt' node corresponds to an Nt'' edge of capacity 1, and each Nt' edge corresponds to +// an Nt'' edge of capacity ∞. Therefore, we only need to explicitly record flow for Nt' edges and through Nt' nodes. +// +// 3. The FlowMap paper ambiguously states: "Moreover, we can find such a cut (X′′, X̅′′) by performing a depth first search starting at +// the source s, and including in X′′ all the nodes which are reachable from s." This actually refers to a specific kind of search, +// min-cut computation. Min-cut computation involves computing the set of nodes reachable from s by an undirected path with no full +// (i.e. zero capacity) forward edges or empty (i.e. no flow) backward edges. In addition, the depth first search is required to compute +// a max-volume max-flow min-cut specifically, because a max-flow min-cut is not, in general, unique. + +// Notes on implementation: +// +// 1. To compute depth optimal packing, an intermediate representation is used, where each cell with n output bits is split into n graph +// nodes. Each such graph node is represented directly with the wire bit (RTLIL::SigBit instance) that corresponds to the output bit +// it is created from. Fan-in and fan-out are represented explicitly by edge lists derived from the RTLIL graph. This IR never changes +// after it has been computed. +// +// In terms of data, this IR is comprised of `inputs`, `outputs`, `nodes`, `edges_fw` and `edges_bw` fields. +// +// We call this IR "gate IR". +// +// 2. To compute area optimal packing, another intermediate representation is used, which consists of some K-feasible cone for every node +// that exists in the gate IR. Immediately after depth optimal packing with FlowMap, each such cone occupies the lowest possible depth, +// but this is not true in general, and transformations of this IR may change the cones, although each transformation has to keep each +// cone K-feasible. In this IR, LUT fan-in and fan-out are represented explicitly by edge lists; if a K-feasible cone chosen for node A +// includes nodes B and C, there are edges between all predecessors of A, B and C in the gate IR and node A in this IR. Moreover, in +// this IR, cones may be *realized* or *derealized*. Only realized cones will end up mapped to actual LUTs in the output of this pass. +// +// Intuitively, this IR contains (some, ideally but not necessarily optimal) LUT representation for each input cell. By starting at outputs +// and traversing the graph of this IR backwards, each K-feasible cone is converted to an actual LUT at the end of the pass. This is +// the same as iterating through each realized LUT. +// +// The following are the invariants of this IR: +// a) Each gate IR node corresponds to a K-feasible cut. +// b) Each realized LUT is reachable through backward edges from some output. +// c) The LUT fan-in is exactly the fan-in of its constituent gates minus the fan-out of its constituent gates. +// The invariants are kept even for derealized LUTs, since the whole point of this IR is ease of packing, unpacking, and repacking LUTs. +// +// In terms of data, this IR is comprised of `lut_nodes` (the set of all realized LUTs), `lut_gates` (the map from a LUT to its +// constituent gates), `lut_edges_fw` and `lut_edges_bw` fields. The `inputs` and `outputs` fields are shared with the gate IR. +// +// We call this IR "LUT IR". + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" +#include "kernel/modtools.h" +#include "kernel/consteval.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct GraphStyle +{ + string label; + string color, fillcolor; + + GraphStyle(string label = "", string color = "black", string fillcolor = "") : + label(label), color(color), fillcolor(fillcolor) {} +}; + +static string dot_escape(string value) +{ + std::string escaped; + for (char c : value) { + if (c == '\n') + { + escaped += "\\n"; + continue; + } + if (c == '\\' || c == '"') + escaped += "\\"; + escaped += c; + } + return escaped; +} + +static void dump_dot_graph(string filename, + pool<RTLIL::SigBit> nodes, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges, + pool<RTLIL::SigBit> inputs, pool<RTLIL::SigBit> outputs, + std::function<GraphStyle(RTLIL::SigBit)> node_style = + [](RTLIL::SigBit) { return GraphStyle{}; }, + std::function<GraphStyle(RTLIL::SigBit, RTLIL::SigBit)> edge_style = + [](RTLIL::SigBit, RTLIL::SigBit) { return GraphStyle{}; }, + string name = "") +{ + FILE *f = fopen(filename.c_str(), "w"); + fprintf(f, "digraph \"%s\" {\n", name.c_str()); + fprintf(f, " rankdir=\"TB\";\n"); + + dict<RTLIL::SigBit, int> ids; + for (auto node : nodes) + { + ids[node] = ids.size(); + + string shape = "ellipse"; + if (inputs[node]) + shape = "box"; + if (outputs[node]) + shape = "octagon"; + auto prop = node_style(node); + string style = ""; + if (!prop.fillcolor.empty()) + style = "filled"; + fprintf(f, " n%d [ shape=%s, fontname=\"Monospace\", label=\"%s\", color=\"%s\", fillcolor=\"%s\", style=\"%s\" ];\n", + ids[node], shape.c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str(), style.c_str()); + } + + fprintf(f, " { rank=\"source\"; "); + for (auto input : inputs) + if (nodes[input]) + fprintf(f, "n%d; ", ids[input]); + fprintf(f, "}\n"); + + fprintf(f, " { rank=\"sink\"; "); + for (auto output : outputs) + if (nodes[output]) + fprintf(f, "n%d; ", ids[output]); + fprintf(f, "}\n"); + + for (auto edge : edges) + { + auto source = edge.first; + for (auto sink : edge.second) { + if (nodes[source] && nodes[sink]) + { + auto prop = edge_style(source, sink); + fprintf(f, " n%d -> n%d [ label=\"%s\", color=\"%s\", fillcolor=\"%s\" ];\n", + ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str()); + } + } + } + + fprintf(f, "}\n"); + fclose(f); +} + +struct FlowGraph +{ + const RTLIL::SigBit source; + RTLIL::SigBit sink; + pool<RTLIL::SigBit> nodes = {source}; + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw; + + const int MAX_NODE_FLOW = 1; + dict<RTLIL::SigBit, int> node_flow; + dict<pair<RTLIL::SigBit, RTLIL::SigBit>, int> edge_flow; + + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed; + + void dump_dot_graph(string filename) + { + auto node_style = [&](RTLIL::SigBit node) { + string label = (node == source) ? "(source)" : log_signal(node); + for (auto collapsed_node : collapsed[node]) + label += stringf(" %s", log_signal(collapsed_node)); + int flow = node_flow[node]; + if (node != source && node != sink) + label += stringf("\n%d/%d", flow, MAX_NODE_FLOW); + else + label += stringf("\n%d/∞", flow); + return GraphStyle{label, flow < MAX_NODE_FLOW ? "green" : "black"}; + }; + auto edge_style = [&](RTLIL::SigBit source, RTLIL::SigBit sink) { + int flow = edge_flow[{source, sink}]; + return GraphStyle{stringf("%d/∞", flow), flow > 0 ? "blue" : "black"}; + }; + ::dump_dot_graph(filename, nodes, edges_fw, {source}, {sink}, node_style, edge_style); + } + + // Here, we are working on the Nt'' network, but our representation is the Nt' network. + // The difference between these is that where in Nt' we have a subgraph: + // + // v1 -> v2 -> v3 + // + // in Nt'' we have a corresponding subgraph: + // + // v'1b -∞-> v'2t -f-> v'2b -∞-> v'3t + // + // To address this, we split each node v into two nodes, v't and v'b. This representation is virtual, + // in the sense that nodes v't and v'b are overlaid on top of the original node v, and only exist + // in paths and worklists. + + struct NodePrime + { + RTLIL::SigBit node; + bool is_bottom; + + NodePrime(RTLIL::SigBit node, bool is_bottom) : + node(node), is_bottom(is_bottom) {} + + bool operator==(const NodePrime &other) const + { + return node == other.node && is_bottom == other.is_bottom; + } + bool operator!=(const NodePrime &other) const + { + return !(*this == other); + } + unsigned int hash() const + { + return hash_ops<pair<RTLIL::SigBit, int>>::hash({node, is_bottom}); + } + + static NodePrime top(RTLIL::SigBit node) + { + return NodePrime(node, /*is_bottom=*/false); + } + + static NodePrime bottom(RTLIL::SigBit node) + { + return NodePrime(node, /*is_bottom=*/true); + } + + NodePrime as_top() const + { + log_assert(is_bottom); + return top(node); + } + + NodePrime as_bottom() const + { + log_assert(!is_bottom); + return bottom(node); + } + }; + + bool find_augmenting_path(bool commit) + { + NodePrime source_prime = {source, true}; + NodePrime sink_prime = {sink, false}; + vector<NodePrime> path = {source_prime}; + pool<NodePrime> visited = {}; + bool found; + do { + found = false; + + auto node_prime = path.back(); + visited.insert(node_prime); + + if (!node_prime.is_bottom) // vt + { + if (!visited[node_prime.as_bottom()] && node_flow[node_prime.node] < MAX_NODE_FLOW) + { + path.push_back(node_prime.as_bottom()); + found = true; + } + else + { + for (auto node_pred : edges_bw[node_prime.node]) + { + if (!visited[NodePrime::bottom(node_pred)] && edge_flow[{node_pred, node_prime.node}] > 0) + { + path.push_back(NodePrime::bottom(node_pred)); + found = true; + break; + } + } + } + } + else // vb + { + if (!visited[node_prime.as_top()] && node_flow[node_prime.node] > 0) + { + path.push_back(node_prime.as_top()); + found = true; + } + else + { + for (auto node_succ : edges_fw[node_prime.node]) + { + if (!visited[NodePrime::top(node_succ)] /* && edge_flow[...] < ∞ */) + { + path.push_back(NodePrime::top(node_succ)); + found = true; + break; + } + } + } + } + + if (!found && path.size() > 1) + { + path.pop_back(); + found = true; + } + } while(path.back() != sink_prime && found); + + if (commit && path.back() == sink_prime) + { + auto prev_prime = path.front(); + for (auto node_prime : path) + { + if (node_prime == source_prime) + continue; + + log_assert(prev_prime.is_bottom ^ node_prime.is_bottom); + if (prev_prime.node == node_prime.node) + { + auto node = node_prime.node; + if (!prev_prime.is_bottom && node_prime.is_bottom) + { + log_assert(node_flow[node] == 0); + node_flow[node]++; + } + else + { + log_assert(node_flow[node] != 0); + node_flow[node]--; + } + } + else + { + if (prev_prime.is_bottom && !node_prime.is_bottom) + { + log_assert(true /* edge_flow[...] < ∞ */); + edge_flow[{prev_prime.node, node_prime.node}]++; + } + else + { + log_assert((edge_flow[{node_prime.node, prev_prime.node}] > 0)); + edge_flow[{node_prime.node, prev_prime.node}]--; + } + } + prev_prime = node_prime; + } + + node_flow[source]++; + node_flow[sink]++; + } + return path.back() == sink_prime; + } + + int maximum_flow(int order) + { + int flow = 0; + while (flow < order && find_augmenting_path(/*commit=*/true)) + flow++; + return flow + find_augmenting_path(/*commit=*/false); + } + + pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> edge_cut() + { + pool<RTLIL::SigBit> x, xi; + + NodePrime source_prime = {source, true}; + NodePrime sink_prime = {sink, false}; + pool<NodePrime> visited; + vector<NodePrime> worklist = {source_prime}; + while (!worklist.empty()) + { + auto node_prime = worklist.back(); + worklist.pop_back(); + if (visited[node_prime]) + continue; + visited.insert(node_prime); + + if (!node_prime.is_bottom) + x.insert(node_prime.node); + + // Mincut is constructed by traversing a graph in an undirected way along forward edges that aren't full, or backward edges + // that aren't empty. + if (!node_prime.is_bottom) // top + { + if (node_flow[node_prime.node] < MAX_NODE_FLOW) + worklist.push_back(node_prime.as_bottom()); + for (auto node_pred : edges_bw[node_prime.node]) + if (edge_flow[{node_pred, node_prime.node}] > 0) + worklist.push_back(NodePrime::bottom(node_pred)); + } + else // bottom + { + if (node_flow[node_prime.node] > 0) + worklist.push_back(node_prime.as_top()); + for (auto node_succ : edges_fw[node_prime.node]) + if (true /* edge_flow[...] < ∞ */) + worklist.push_back(NodePrime::top(node_succ)); + } + } + + for (auto node : nodes) + if (!x[node]) + xi.insert(node); + + for (auto collapsed_node : collapsed[sink]) + xi.insert(collapsed_node); + + log_assert(!x[sink] && xi[sink]); + return {x, xi}; + } +}; + +struct FlowmapWorker +{ + int order; + int r_alpha, r_beta, r_gamma; + bool debug, debug_relax; + + RTLIL::Module *module; + SigMap sigmap; + ModIndex index; + + dict<RTLIL::SigBit, ModIndex::PortInfo> node_origins; + + // Gate IR + pool<RTLIL::SigBit> nodes, inputs, outputs; + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw; + dict<RTLIL::SigBit, int> labels; + + // LUT IR + pool<RTLIL::SigBit> lut_nodes; + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_gates; + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_edges_fw, lut_edges_bw; + dict<RTLIL::SigBit, int> lut_depths, lut_altitudes, lut_slacks; + + int gate_count = 0, lut_count = 0, packed_count = 0; + int gate_area = 0, lut_area = 0; + + enum class GraphMode { + Label, + Cut, + Slack, + }; + + void dump_dot_graph(string filename, GraphMode mode, + pool<RTLIL::SigBit> subgraph_nodes = {}, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> subgraph_edges = {}, + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed = {}, + pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut = {}) + { + if (subgraph_nodes.empty()) + subgraph_nodes = nodes; + if (subgraph_edges.empty()) + subgraph_edges = edges_fw; + + auto node_style = [&](RTLIL::SigBit node) { + string label = log_signal(node); + for (auto collapsed_node : collapsed[node]) + if (collapsed_node != node) + label += stringf(" %s", log_signal(collapsed_node)); + switch (mode) + { + case GraphMode::Label: + if (labels[node] == -1) + { + label += "\nl=?"; + return GraphStyle{label}; + } + else + { + label += stringf("\nl=%d", labels[node]); + string fillcolor = stringf("/set311/%d", 1 + labels[node] % 11); + return GraphStyle{label, "", fillcolor}; + } + + case GraphMode::Cut: + if (cut.first[node]) + return GraphStyle{label, "blue"}; + if (cut.second[node]) + return GraphStyle{label, "red"}; + return GraphStyle{label}; + + case GraphMode::Slack: + label += stringf("\nd=%d a=%d\ns=%d", lut_depths[node], lut_altitudes[node], lut_slacks[node]); + return GraphStyle{label, lut_slacks[node] == 0 ? "red" : "black"}; + } + return GraphStyle{label}; + }; + auto edge_style = [&](RTLIL::SigBit, RTLIL::SigBit) { + return GraphStyle{}; + }; + ::dump_dot_graph(filename, subgraph_nodes, subgraph_edges, inputs, outputs, node_style, edge_style, module->name.str()); + } + + void dump_dot_lut_graph(string filename, GraphMode mode) + { + pool<RTLIL::SigBit> lut_and_input_nodes; + lut_and_input_nodes.insert(lut_nodes.begin(), lut_nodes.end()); + lut_and_input_nodes.insert(inputs.begin(), inputs.end()); + dump_dot_graph(filename, mode, lut_and_input_nodes, lut_edges_fw, lut_gates); + } + + pool<RTLIL::SigBit> find_subgraph(RTLIL::SigBit sink) + { + pool<RTLIL::SigBit> subgraph; + pool<RTLIL::SigBit> worklist = {sink}; + while (!worklist.empty()) + { + auto node = worklist.pop(); + subgraph.insert(node); + for (auto source : edges_bw[node]) + { + if (!subgraph[source]) + worklist.insert(source); + } + } + return subgraph; + } + + FlowGraph build_flow_graph(RTLIL::SigBit sink, int p) + { + FlowGraph flow_graph; + flow_graph.sink = sink; + + pool<RTLIL::SigBit> worklist = {sink}, visited; + while (!worklist.empty()) + { + auto node = worklist.pop(); + visited.insert(node); + + auto collapsed_node = labels[node] == p ? sink : node; + if (node != collapsed_node) + flow_graph.collapsed[collapsed_node].insert(node); + flow_graph.nodes.insert(collapsed_node); + + for (auto node_pred : edges_bw[node]) + { + auto collapsed_node_pred = labels[node_pred] == p ? sink : node_pred; + if (node_pred != collapsed_node_pred) + flow_graph.collapsed[collapsed_node_pred].insert(node_pred); + if (collapsed_node != collapsed_node_pred) + { + flow_graph.edges_bw[collapsed_node].insert(collapsed_node_pred); + flow_graph.edges_fw[collapsed_node_pred].insert(collapsed_node); + } + if (inputs[node_pred]) + { + flow_graph.edges_bw[collapsed_node_pred].insert(flow_graph.source); + flow_graph.edges_fw[flow_graph.source].insert(collapsed_node_pred); + } + + if (!visited[node_pred]) + worklist.insert(node_pred); + } + } + return flow_graph; + } + + void discover_nodes(pool<IdString> cell_types) + { + for (auto cell : module->selected_cells()) + { + if (!cell_types[cell->type]) + continue; + + if (!cell->known()) + log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell)); + + pool<RTLIL::SigBit> fanout; + for (auto conn : cell->connections()) + { + if (!cell->output(conn.first)) continue; + int offset = -1; + for (auto bit : conn.second) + { + offset++; + if (!bit.wire) continue; + auto mapped_bit = sigmap(bit); + if (nodes[mapped_bit]) + log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit)); + nodes.insert(mapped_bit); + node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset); + fanout.insert(mapped_bit); + } + } + + int fanin = 0; + for (auto conn : cell->connections()) + { + if (!cell->input(conn.first)) continue; + for (auto bit : sigmap(conn.second)) + { + if (!bit.wire) continue; + for (auto fanout_bit : fanout) + { + edges_fw[bit].insert(fanout_bit); + edges_bw[fanout_bit].insert(bit); + } + fanin++; + } + } + + if (fanin > order) + log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n", + cell->type.c_str(), log_id(module), log_id(cell), fanin, order); + + gate_count++; + gate_area += 1 << fanin; + } + + for (auto edge : edges_fw) + { + if (!nodes[edge.first]) + { + inputs.insert(edge.first); + nodes.insert(edge.first); + } + } + + for (auto node : nodes) + { + auto node_info = index.query(node); + if (node_info->is_output && !inputs[node]) + outputs.insert(node); + for (auto port : node_info->ports) + if (!cell_types[port.cell->type] && !inputs[node]) + outputs.insert(node); + } + + if (debug) + { + dump_dot_graph("flowmap-initial.dot", GraphMode::Label); + log("Dumped initial graph to `flowmap-initial.dot`.\n"); + } + } + + void label_nodes() + { + for (auto node : nodes) + labels[node] = -1; + for (auto input : inputs) + { + if (input.wire->attributes.count("\\$flowmap_level")) + labels[input] = input.wire->attributes["\\$flowmap_level"].as_int(); + else + labels[input] = 0; + } + + pool<RTLIL::SigBit> worklist = nodes; + int debug_num = 0; + while (!worklist.empty()) + { + auto sink = worklist.pop(); + if (labels[sink] != -1) + continue; + + bool inputs_have_labels = true; + for (auto sink_input : edges_bw[sink]) + { + if (labels[sink_input] == -1) + { + inputs_have_labels = false; + break; + } + } + if (!inputs_have_labels) + continue; + + if (debug) + { + debug_num++; + log("Examining subgraph %d rooted in %s.\n", debug_num, log_signal(sink)); + } + + pool<RTLIL::SigBit> subgraph = find_subgraph(sink); + + int p = 1; + for (auto subgraph_node : subgraph) + p = max(p, labels[subgraph_node]); + + FlowGraph flow_graph = build_flow_graph(sink, p); + int flow = flow_graph.maximum_flow(order); + pool<RTLIL::SigBit> x, xi; + if (flow <= order) + { + labels[sink] = p; + auto cut = flow_graph.edge_cut(); + x = cut.first; + xi = cut.second; + } + else + { + labels[sink] = p + 1; + x = subgraph; + x.erase(sink); + xi.insert(sink); + } + lut_gates[sink] = xi; + + pool<RTLIL::SigBit> k; + for (auto xi_node : xi) + { + for (auto xi_node_pred : edges_bw[xi_node]) + if (x[xi_node_pred]) + k.insert(xi_node_pred); + } + log_assert((int)k.size() <= order); + lut_edges_bw[sink] = k; + for (auto k_node : k) + lut_edges_fw[k_node].insert(sink); + + if (debug) + { + log(" Maximum flow: %d. Assigned label %d.\n", flow, labels[sink]); + dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), GraphMode::Cut, subgraph, {}, {}, {x, xi}); + log(" Dumped subgraph to `flowmap-%d-sub.dot`.\n", debug_num); + flow_graph.dump_dot_graph(stringf("flowmap-%d-flow.dot", debug_num)); + log(" Dumped flow graph to `flowmap-%d-flow.dot`.\n", debug_num); + log(" LUT inputs:"); + for (auto k_node : k) + log(" %s", log_signal(k_node)); + log(".\n"); + log(" LUT packed gates:"); + for (auto xi_node : xi) + log(" %s", log_signal(xi_node)); + log(".\n"); + } + + for (auto sink_succ : edges_fw[sink]) + worklist.insert(sink_succ); + } + + if (debug) + { + dump_dot_graph("flowmap-labeled.dot", GraphMode::Label); + log("Dumped labeled graph to `flowmap-labeled.dot`.\n"); + } + } + + int map_luts() + { + pool<RTLIL::SigBit> worklist = outputs; + while (!worklist.empty()) + { + auto lut_node = worklist.pop(); + lut_nodes.insert(lut_node); + for (auto input_node : lut_edges_bw[lut_node]) + if (!lut_nodes[input_node] && !inputs[input_node]) + worklist.insert(input_node); + } + + int depth = 0; + for (auto label : labels) + depth = max(depth, label.second); + log("Mapped to %zu LUTs with maximum depth %d.\n", lut_nodes.size(), depth); + + if (debug) + { + dump_dot_lut_graph("flowmap-mapped.dot", GraphMode::Label); + log("Dumped mapped graph to `flowmap-mapped.dot`.\n"); + } + + return depth; + } + + void realize_derealize_lut(RTLIL::SigBit lut, pool<RTLIL::SigBit> *changed = nullptr) + { + pool<RTLIL::SigBit> worklist = {lut}; + while (!worklist.empty()) + { + auto lut = worklist.pop(); + if (inputs[lut]) + continue; + + bool realized_successors = false; + for (auto lut_succ : lut_edges_fw[lut]) + if (lut_nodes[lut_succ]) + realized_successors = true; + + if (realized_successors && !lut_nodes[lut]) + lut_nodes.insert(lut); + else if (!realized_successors && lut_nodes[lut]) + lut_nodes.erase(lut); + else + continue; + + for (auto lut_pred : lut_edges_bw[lut]) + worklist.insert(lut_pred); + + if (changed) + changed->insert(lut); + } + } + + void add_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr) + { + log_assert(!lut_edges_fw[pred][succ] && !lut_edges_bw[succ][pred]); + log_assert((int)lut_edges_bw[succ].size() < order); + + lut_edges_fw[pred].insert(succ); + lut_edges_bw[succ].insert(pred); + realize_derealize_lut(pred, changed); + + if (changed) + { + changed->insert(pred); + changed->insert(succ); + } + } + + void remove_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr) + { + log_assert(lut_edges_fw[pred][succ] && lut_edges_bw[succ][pred]); + + lut_edges_fw[pred].erase(succ); + lut_edges_bw[succ].erase(pred); + realize_derealize_lut(pred, changed); + + if (changed) + { + if (lut_nodes[pred]) + changed->insert(pred); + changed->insert(succ); + } + } + + pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut_lut_at_gate(RTLIL::SigBit lut, RTLIL::SigBit lut_gate) + { + pool<RTLIL::SigBit> gate_inputs = lut_edges_bw[lut]; + pool<RTLIL::SigBit> other_inputs; + pool<RTLIL::SigBit> worklist = {lut}; + while (!worklist.empty()) + { + auto node = worklist.pop(); + for (auto node_pred : edges_bw[node]) + { + if (node_pred == lut_gate) + continue; + if (lut_gates[lut][node_pred]) + worklist.insert(node_pred); + else + { + gate_inputs.erase(node_pred); + other_inputs.insert(node_pred); + } + } + } + return {gate_inputs, other_inputs}; + } + + void compute_lut_distances(dict<RTLIL::SigBit, int> &lut_distances, bool forward, + pool<RTLIL::SigBit> initial = {}, pool<RTLIL::SigBit> *changed = nullptr) + { + pool<RTLIL::SigBit> terminals = forward ? inputs : outputs; + auto &lut_edges_next = forward ? lut_edges_fw : lut_edges_bw; + auto &lut_edges_prev = forward ? lut_edges_bw : lut_edges_fw; + + if (initial.empty()) + initial = terminals; + for (auto node : initial) + lut_distances.erase(node); + + pool<RTLIL::SigBit> worklist = initial; + while (!worklist.empty()) + { + auto lut = worklist.pop(); + int lut_distance = 0; + if (forward && inputs[lut]) + lut_distance = labels[lut]; // to support (* $flowmap_level=n *) + for (auto lut_prev : lut_edges_prev[lut]) + if ((lut_nodes[lut_prev] || inputs[lut_prev]) && lut_distances.count(lut_prev)) + lut_distance = max(lut_distance, lut_distances[lut_prev] + 1); + if (!lut_distances.count(lut) || lut_distances[lut] != lut_distance) + { + lut_distances[lut] = lut_distance; + if (changed != nullptr && !inputs[lut]) + changed->insert(lut); + for (auto lut_next : lut_edges_next[lut]) + if (lut_nodes[lut_next] || inputs[lut_next]) + worklist.insert(lut_next); + } + } + } + + void check_lut_distances(const dict<RTLIL::SigBit, int> &lut_distances, bool forward) + { + dict<RTLIL::SigBit, int> gold_lut_distances; + compute_lut_distances(gold_lut_distances, forward); + for (auto lut_distance : lut_distances) + if (lut_nodes[lut_distance.first]) + log_assert(lut_distance.second == gold_lut_distances[lut_distance.first]); + } + + // LUT depth is the length of the longest path from any input in LUT fan-in to LUT. + // LUT altitude (for lack of a better term) is the length of the longest path from LUT to any output in LUT fan-out. + void update_lut_depths_altitudes(pool<RTLIL::SigBit> worklist = {}, pool<RTLIL::SigBit> *changed = nullptr) + { + compute_lut_distances(lut_depths, /*forward=*/true, worklist, changed); + compute_lut_distances(lut_altitudes, /*forward=*/false, worklist, changed); + if (debug_relax && !worklist.empty()) { + check_lut_distances(lut_depths, /*forward=*/true); + check_lut_distances(lut_altitudes, /*forward=*/false); + } + } + + // LUT critical output set is the set of outputs whose depth will increase (equivalently, slack will decrease) if the depth of + // the LUT increases. (This is referred to as RPOv for LUTv in the paper.) + void compute_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs, + pool<RTLIL::SigBit> worklist = {}) + { + if (worklist.empty()) + worklist = lut_nodes; + + while (!worklist.empty()) + { + bool updated_some = false; + for (auto lut : worklist) + { + if (outputs[lut]) + lut_critical_outputs[lut] = {lut}; + else + { + bool all_succ_computed = true; + lut_critical_outputs[lut] = {}; + for (auto lut_succ : lut_edges_fw[lut]) + { + if (lut_nodes[lut_succ] && lut_depths[lut_succ] == lut_depths[lut] + 1) + { + if (lut_critical_outputs.count(lut_succ)) + lut_critical_outputs[lut].insert(lut_critical_outputs[lut_succ].begin(), lut_critical_outputs[lut_succ].end()); + else + { + all_succ_computed = false; + break; + } + } + } + if (!all_succ_computed) + { + lut_critical_outputs.erase(lut); + continue; + } + } + worklist.erase(lut); + updated_some = true; + } + log_assert(updated_some); + } + } + + // Invalidating LUT critical output sets is tricky, because increasing the depth of a LUT may take other, adjacent LUTs off the critical + // path to the output. Conservatively, if we increase depth of some LUT, every LUT in its input cone needs to have its critical output + // set invalidated, too. + pool<RTLIL::SigBit> invalidate_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs, + pool<RTLIL::SigBit> worklist) + { + pool<RTLIL::SigBit> changed; + while (!worklist.empty()) + { + auto lut = worklist.pop(); + changed.insert(lut); + lut_critical_outputs.erase(lut); + for (auto lut_pred : lut_edges_bw[lut]) + { + if (lut_nodes[lut_pred] && !changed[lut_pred]) + { + changed.insert(lut_pred); + worklist.insert(lut_pred); + } + } + } + return changed; + } + + void check_lut_critical_outputs(const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs) + { + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> gold_lut_critical_outputs; + compute_lut_critical_outputs(gold_lut_critical_outputs); + for (auto lut_critical_output : lut_critical_outputs) + if (lut_nodes[lut_critical_output.first]) + log_assert(lut_critical_output.second == gold_lut_critical_outputs[lut_critical_output.first]); + } + + void update_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs, + pool<RTLIL::SigBit> worklist = {}) + { + if (!worklist.empty()) + { + pool<RTLIL::SigBit> invalidated = invalidate_lut_critical_outputs(lut_critical_outputs, worklist); + compute_lut_critical_outputs(lut_critical_outputs, invalidated); + check_lut_critical_outputs(lut_critical_outputs); + } + else + compute_lut_critical_outputs(lut_critical_outputs); + } + + void update_breaking_node_potentials(dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> &potentials, + const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs) + { + for (auto lut : lut_nodes) + { + if (potentials.count(lut)) + continue; + if (lut_gates[lut].size() == 1 || lut_slacks[lut] == 0) + continue; + + if (debug_relax) + log(" Computing potentials for LUT %s.\n", log_signal(lut)); + + for (auto lut_gate : lut_gates[lut]) + { + if (lut == lut_gate) + continue; + + if (debug_relax) + log(" Considering breaking node %s.\n", log_signal(lut_gate)); + + int r_ex, r_im, r_slk; + + auto cut_inputs = cut_lut_at_gate(lut, lut_gate); + pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second; + if (gate_inputs.empty() && (int)other_inputs.size() == order) + { + if (debug_relax) + log(" Breaking would result in a (k+1)-LUT.\n"); + continue; + } + + pool<RTLIL::SigBit> elim_fanin_luts; + for (auto gate_input : gate_inputs) + { + if (lut_edges_fw[gate_input].size() == 1) + { + log_assert(lut_edges_fw[gate_input][lut]); + elim_fanin_luts.insert(gate_input); + } + } + if (debug_relax) + { + if (!lut_nodes[lut_gate]) + log(" Breaking requires a new LUT.\n"); + if (!gate_inputs.empty()) + { + log(" Breaking eliminates LUT inputs"); + for (auto gate_input : gate_inputs) + log(" %s", log_signal(gate_input)); + log(".\n"); + } + if (!elim_fanin_luts.empty()) + { + log(" Breaking eliminates fan-in LUTs"); + for (auto elim_fanin_lut : elim_fanin_luts) + log(" %s", log_signal(elim_fanin_lut)); + log(".\n"); + } + } + r_ex = (lut_nodes[lut_gate] ? 0 : -1) + elim_fanin_luts.size(); + + pool<pair<RTLIL::SigBit, RTLIL::SigBit>> maybe_mergeable_luts; + + // Try to merge LUTv with one of its successors. + RTLIL::SigBit last_lut_succ; + int fanout = 0; + for (auto lut_succ : lut_edges_fw[lut]) + { + if (lut_nodes[lut_succ]) + { + fanout++; + last_lut_succ = lut_succ; + } + } + if (fanout == 1) + maybe_mergeable_luts.insert({lut, last_lut_succ}); + + // Try to merge LUTv with one of its predecessors. + for (auto lut_pred : other_inputs) + { + int fanout = 0; + for (auto lut_pred_succ : lut_edges_fw[lut_pred]) + if (lut_nodes[lut_pred_succ] || lut_pred_succ == lut_gate) + fanout++; + if (fanout == 1) + maybe_mergeable_luts.insert({lut_pred, lut}); + } + + // Try to merge LUTw with one of its predecessors. + for (auto lut_gate_pred : lut_edges_bw[lut_gate]) + { + int fanout = 0; + for (auto lut_gate_pred_succ : lut_edges_fw[lut_gate_pred]) + if (lut_nodes[lut_gate_pred_succ] || lut_gate_pred_succ == lut_gate) + fanout++; + if (fanout == 1) + maybe_mergeable_luts.insert({lut_gate_pred, lut_gate}); + } + + r_im = 0; + for (auto maybe_mergeable_pair : maybe_mergeable_luts) + { + log_assert(lut_edges_fw[maybe_mergeable_pair.first][maybe_mergeable_pair.second]); + pool<RTLIL::SigBit> unique_inputs; + for (auto fst_lut_pred : lut_edges_bw[maybe_mergeable_pair.first]) + if (lut_nodes[fst_lut_pred]) + unique_inputs.insert(fst_lut_pred); + for (auto snd_lut_pred : lut_edges_bw[maybe_mergeable_pair.second]) + if (lut_nodes[snd_lut_pred]) + unique_inputs.insert(snd_lut_pred); + unique_inputs.erase(maybe_mergeable_pair.first); + if ((int)unique_inputs.size() <= order) + { + if (debug_relax) + log(" Breaking may allow merging %s and %s.\n", + log_signal(maybe_mergeable_pair.first), log_signal(maybe_mergeable_pair.second)); + r_im++; + } + } + + int lut_gate_depth; + if (lut_nodes[lut_gate]) + lut_gate_depth = lut_depths[lut_gate]; + else + { + lut_gate_depth = 0; + for (auto lut_gate_pred : lut_edges_bw[lut_gate]) + lut_gate_depth = max(lut_gate_depth, lut_depths[lut_gate_pred] + 1); + } + if (lut_depths[lut] >= lut_gate_depth + 1) + r_slk = 0; + else + { + int depth_delta = lut_gate_depth + 1 - lut_depths[lut]; + if (depth_delta > lut_slacks[lut]) + { + if (debug_relax) + log(" Breaking would increase depth by %d, which is more than available slack.\n", depth_delta); + continue; + } + + if (debug_relax) + { + log(" Breaking increases depth of LUT by %d.\n", depth_delta); + if (lut_critical_outputs.at(lut).size()) + { + log(" Breaking decreases slack of outputs"); + for (auto lut_critical_output : lut_critical_outputs.at(lut)) + { + log(" %s", log_signal(lut_critical_output)); + log_assert(lut_slacks[lut_critical_output] > 0); + } + log(".\n"); + } + } + r_slk = lut_critical_outputs.at(lut).size() * depth_delta; + } + + int p = 100 * (r_alpha * r_ex + r_beta * r_im + r_gamma) / (r_slk + 1); + if (debug_relax) + log(" Potential for breaking node %s: %d (Rex=%d, Rim=%d, Rslk=%d).\n", + log_signal(lut_gate), p, r_ex, r_im, r_slk); + potentials[lut][lut_gate] = p; + } + } + } + + bool relax_depth_for_bound(bool first, int depth_bound, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs) + { + size_t initial_count = lut_nodes.size(); + + for (auto node : lut_nodes) + { + lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]); + log_assert(lut_slacks[node] >= 0); + } + if (debug) + { + dump_dot_lut_graph(stringf("flowmap-relax-%d-initial.dot", depth_bound), GraphMode::Slack); + log(" Dumped initial slack graph to `flowmap-relax-%d-initial.dot`.\n", depth_bound); + } + + dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> potentials; + for (int break_num = 1; ; break_num++) + { + update_breaking_node_potentials(potentials, lut_critical_outputs); + + if (potentials.empty()) + { + log(" Relaxed to %zu (+%zu) LUTs.\n", lut_nodes.size(), lut_nodes.size() - initial_count); + if (!first && break_num == 1) + { + log(" Design fully relaxed.\n"); + return true; + } + else + { + log(" Slack exhausted.\n"); + break; + } + } + + RTLIL::SigBit breaking_lut, breaking_gate; + int best_potential = INT_MIN; + for (auto lut_gate_potentials : potentials) + { + for (auto gate_potential : lut_gate_potentials.second) + { + if (gate_potential.second > best_potential) + { + breaking_lut = lut_gate_potentials.first; + breaking_gate = gate_potential.first; + best_potential = gate_potential.second; + } + } + } + log(" Breaking LUT %s to %s LUT %s (potential %d).\n", + log_signal(breaking_lut), lut_nodes[breaking_gate] ? "reuse" : "extract", log_signal(breaking_gate), best_potential); + + if (debug_relax) + log(" Removing breaking gate %s from LUT.\n", log_signal(breaking_gate)); + lut_gates[breaking_lut].erase(breaking_gate); + + auto cut_inputs = cut_lut_at_gate(breaking_lut, breaking_gate); + pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second; + + pool<RTLIL::SigBit> worklist = lut_gates[breaking_lut]; + pool<RTLIL::SigBit> elim_gates = gate_inputs; + while (!worklist.empty()) + { + auto lut_gate = worklist.pop(); + bool all_gate_preds_elim = true; + for (auto lut_gate_pred : edges_bw[lut_gate]) + if (!elim_gates[lut_gate_pred]) + all_gate_preds_elim = false; + if (all_gate_preds_elim) + { + if (debug_relax) + log(" Removing gate %s from LUT.\n", log_signal(lut_gate)); + lut_gates[breaking_lut].erase(lut_gate); + for (auto lut_gate_succ : edges_fw[lut_gate]) + worklist.insert(lut_gate_succ); + } + } + log_assert(!lut_gates[breaking_lut].empty()); + + pool<RTLIL::SigBit> directly_affected_nodes = {breaking_lut}; + for (auto gate_input : gate_inputs) + { + if (debug_relax) + log(" Removing LUT edge %s -> %s.\n", log_signal(gate_input), log_signal(breaking_lut)); + remove_lut_edge(gate_input, breaking_lut, &directly_affected_nodes); + } + if (debug_relax) + log(" Adding LUT edge %s -> %s.\n", log_signal(breaking_gate), log_signal(breaking_lut)); + add_lut_edge(breaking_gate, breaking_lut, &directly_affected_nodes); + + if (debug_relax) + log(" Updating slack and potentials.\n"); + + pool<RTLIL::SigBit> indirectly_affected_nodes = {}; + update_lut_depths_altitudes(directly_affected_nodes, &indirectly_affected_nodes); + update_lut_critical_outputs(lut_critical_outputs, indirectly_affected_nodes); + for (auto node : indirectly_affected_nodes) + { + lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]); + log_assert(lut_slacks[node] >= 0); + if (debug_relax) + log(" LUT %s now has depth %d and slack %d.\n", log_signal(node), lut_depths[node], lut_slacks[node]); + } + + worklist = indirectly_affected_nodes; + pool<RTLIL::SigBit> visited; + while (!worklist.empty()) + { + auto node = worklist.pop(); + visited.insert(node); + potentials.erase(node); + // We are invalidating the entire output cone of the gate IR node, not just of the LUT IR node. This is done to also invalidate + // all LUTs that could contain one of the indirectly affected nodes as a *part* of them, as they may not be in the output cone + // of any of the LUT IR nodes, e.g. if we have a LUT IR node A and node B as predecessors of node C, where node B includes all + // gates from node A. + for (auto node_succ : edges_fw[node]) + if (!visited[node_succ]) + worklist.insert(node_succ); + } + + if (debug) + { + dump_dot_lut_graph(stringf("flowmap-relax-%d-break-%d.dot", depth_bound, break_num), GraphMode::Slack); + log(" Dumped slack graph after break %d to `flowmap-relax-%d-break-%d.dot`.\n", break_num, depth_bound, break_num); + } + } + + return false; + } + + void optimize_area(int depth, int optarea) + { + dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_critical_outputs; + update_lut_depths_altitudes(); + update_lut_critical_outputs(lut_critical_outputs); + + for (int depth_bound = depth; depth_bound <= depth + optarea; depth_bound++) + { + log("Relaxing with depth bound %d.\n", depth_bound); + bool fully_relaxed = relax_depth_for_bound(depth_bound == depth, depth_bound, lut_critical_outputs); + + if (fully_relaxed) + break; + } + } + + void pack_cells(int minlut) + { + ConstEval ce(module); + for (auto input_node : inputs) + ce.stop(input_node); + + pool<RTLIL::SigBit> mapped_nodes; + for (auto node : lut_nodes) + { + if (node_origins.count(node)) + { + auto origin = node_origins[node]; + if (origin.cell->getPort(origin.port).size() == 1) + log("Packing %s.%s.%s (%s).\n", + log_id(module), log_id(origin.cell), origin.port.c_str(), log_signal(node)); + else + log("Packing %s.%s.%s [%d] (%s).\n", + log_id(module), log_id(origin.cell), origin.port.c_str(), origin.offset, log_signal(node)); + } + else + { + log("Packing %s.%s.\n", log_id(module), log_signal(node)); + } + + for (auto gate_node : lut_gates[node]) + { + log_assert(node_origins.count(gate_node)); + + if (gate_node == node) + continue; + + auto gate_origin = node_origins[gate_node]; + if (gate_origin.cell->getPort(gate_origin.port).size() == 1) + log(" Packing %s.%s.%s (%s).\n", + log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), log_signal(gate_node)); + else + log(" Packing %s.%s.%s [%d] (%s).\n", + log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), gate_origin.offset, log_signal(gate_node)); + } + + vector<RTLIL::SigBit> input_nodes(lut_edges_bw[node].begin(), lut_edges_bw[node].end()); + RTLIL::Const lut_table(State::Sx, max(1 << input_nodes.size(), 1 << minlut)); + for (unsigned i = 0; i < (1 << input_nodes.size()); i++) + { + ce.push(); + for (size_t n = 0; n < input_nodes.size(); n++) + ce.set(input_nodes[n], ((i >> n) & 1) ? State::S1 : State::S0); + + RTLIL::SigSpec value = node, undef; + if (!ce.eval(value, undef)) + { + string env; + for (auto input_node : input_nodes) + env += stringf(" %s = %s\n", log_signal(input_node), log_signal(ce.values_map(input_node))); + log_error("Cannot evaluate %s because %s is not defined.\nEvaluation environment:\n%s", + log_signal(node), log_signal(undef), env.c_str()); + } + + lut_table[i] = value.as_bool() ? State::S1 : State::S0; + ce.pop(); + } + + RTLIL::SigSpec lut_a, lut_y = node; + for (auto input_node : input_nodes) + lut_a.append_bit(input_node); + lut_a.append(RTLIL::Const(State::Sx, minlut - input_nodes.size())); + + RTLIL::Cell *lut = module->addLut(NEW_ID, lut_a, lut_y, lut_table); + mapped_nodes.insert(node); + for (auto gate_node : lut_gates[node]) + { + auto gate_origin = node_origins[gate_node]; + lut->add_strpool_attribute("\\src", gate_origin.cell->get_strpool_attribute("\\src")); + packed_count++; + } + lut_count++; + lut_area += lut_table.size(); + + if ((int)input_nodes.size() >= minlut) + log(" Packed into a %zu-LUT %s.%s.\n", input_nodes.size(), log_id(module), log_id(lut)); + else + log(" Packed into a %zu-LUT %s.%s (implemented as %d-LUT).\n", input_nodes.size(), log_id(module), log_id(lut), minlut); + } + + for (auto node : mapped_nodes) + { + auto origin = node_origins[node]; + RTLIL::SigSpec driver = origin.cell->getPort(origin.port); + driver[origin.offset] = module->addWire(NEW_ID); + origin.cell->setPort(origin.port, driver); + } + } + + FlowmapWorker(int order, int minlut, pool<IdString> cell_types, int r_alpha, int r_beta, int r_gamma, + bool relax, int optarea, bool debug, bool debug_relax, + RTLIL::Module *module) : + order(order), r_alpha(r_alpha), r_beta(r_beta), r_gamma(r_gamma), debug(debug), debug_relax(debug_relax), + module(module), sigmap(module), index(module) + { + log("Labeling cells.\n"); + discover_nodes(cell_types); + label_nodes(); + int depth = map_luts(); + + if (relax) + { + log("\n"); + log("Optimizing area.\n"); + optimize_area(depth, optarea); + } + + log("\n"); + log("Packing cells.\n"); + pack_cells(minlut); + } +}; + +static void split(std::vector<std::string> &tokens, const std::string &text, char sep) +{ + size_t start = 0, end = 0; + while ((end = text.find(sep, start)) != std::string::npos) { + tokens.push_back(text.substr(start, end - start)); + start = end + 1; + } + tokens.push_back(text.substr(start)); +} + +struct FlowmapPass : public Pass { + FlowmapPass() : Pass("flowmap", "pack LUTs with FlowMap") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" flowmap [options] [selection]\n"); + log("\n"); + log("This pass uses the FlowMap technology mapping algorithm to pack logic gates\n"); + log("into k-LUTs with optimal depth. It allows mapping any circuit elements that can\n"); + log("be evaluated with the `eval` pass, including cells with multiple output ports\n"); + log("and multi-bit input and output ports.\n"); + log("\n"); + log(" -maxlut k\n"); + log(" perform technology mapping for a k-LUT architecture. if not specified,\n"); + log(" defaults to 3.\n"); + log("\n"); + log(" -minlut n\n"); + log(" only produce n-input or larger LUTs. if not specified, defaults to 1.\n"); + log("\n"); + log(" -cells <cell>[,<cell>,...]\n"); + log(" map specified cells. if not specified, maps $_NOT_, $_AND_, $_OR_,\n"); + log(" $_XOR_ and $_MUX_, which are the outputs of the `simplemap` pass.\n"); + log("\n"); + log(" -relax\n"); + log(" perform depth relaxation and area minimization.\n"); + log("\n"); + log(" -r-alpha n, -r-beta n, -r-gamma n\n"); + log(" parameters of depth relaxation heuristic potential function.\n"); + log(" if not specified, alpha=8, beta=2, gamma=1.\n"); + log("\n"); + log(" -optarea n\n"); + log(" optimize for area by trading off at most n logic levels for fewer LUTs.\n"); + log(" n may be zero, to optimize for area without increasing depth.\n"); + log(" implies -relax.\n"); + log("\n"); + log(" -debug\n"); + log(" dump intermediate graphs.\n"); + log("\n"); + log(" -debug-relax\n"); + log(" explain decisions performed during depth relaxation.\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + int order = 3; + int minlut = 1; + vector<string> cells; + bool relax = false; + int r_alpha = 8, r_beta = 2, r_gamma = 1; + int optarea = 0; + bool debug = false, debug_relax = false; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + if (args[argidx] == "-maxlut" && argidx + 1 < args.size()) + { + order = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-minlut" && argidx + 1 < args.size()) + { + minlut = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-cells" && argidx + 1 < args.size()) + { + split(cells, args[++argidx], ','); + continue; + } + if (args[argidx] == "-relax") + { + relax = true; + continue; + } + if (args[argidx] == "-r-alpha" && argidx + 1 < args.size()) + { + r_alpha = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-r-beta" && argidx + 1 < args.size()) + { + r_beta = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-r-gamma" && argidx + 1 < args.size()) + { + r_gamma = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-optarea" && argidx + 1 < args.size()) + { + relax = true; + optarea = atoi(args[++argidx].c_str()); + continue; + } + if (args[argidx] == "-debug") + { + debug = true; + continue; + } + if (args[argidx] == "-debug-relax") + { + debug = debug_relax = true; + continue; + } + break; + } + extra_args(args, argidx, design); + + pool<IdString> cell_types; + if (!cells.empty()) + { + for (auto &cell : cells) + cell_types.insert(cell); + } + else + { + cell_types = {"$_NOT_", "$_AND_", "$_OR_", "$_XOR_", "$_MUX_"}; + } + + const char *algo_r = relax ? "-r" : ""; + log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap%s).\n", algo_r); + + int gate_count = 0, lut_count = 0, packed_count = 0; + int gate_area = 0, lut_area = 0; + for (auto module : design->selected_modules()) + { + FlowmapWorker worker(order, minlut, cell_types, r_alpha, r_beta, r_gamma, relax, optarea, debug, debug_relax, module); + gate_count += worker.gate_count; + lut_count += worker.lut_count; + packed_count += worker.packed_count; + gate_area += worker.gate_area; + lut_area += worker.lut_area; + } + + log("\n"); + log("Packed %d cells (%d of them duplicated) into %d LUTs.\n", packed_count, packed_count - gate_count, lut_count); + log("Solution takes %.1f%% of original gate area.\n", lut_area * 100.0 / gate_area); + } +} FlowmapPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/tests/flowmap/flow.v b/passes/tests/flowmap/flow.v new file mode 100644 index 000000000..297ef910e --- /dev/null +++ b/passes/tests/flowmap/flow.v @@ -0,0 +1,22 @@ +// Exact reproduction of Figure 2(a) from 10.1109/43.273754. +module top(...); + input a,b,c,d,e,f; + wire nA = b&c; + wire A = !nA; + wire nB = c|d; + wire B = !nB; + wire nC = e&f; + wire C = !nC; + wire D = A|B; + wire E = a&D; + wire nF = D&C; + wire F = !nF; + wire nG = F|B; + wire G = !nG; + wire H = a&F; + wire I = E|G; + wire J = G&C; + wire np = H&I; + output p = !np; + output q = A|J; +endmodule diff --git a/passes/tests/flowmap/flowp.v b/passes/tests/flowmap/flowp.v new file mode 100644 index 000000000..2fb40ffa4 --- /dev/null +++ b/passes/tests/flowmap/flowp.v @@ -0,0 +1,16 @@ +// Like flow.v, but results in a network identical to Figure 2(b). +module top(...); + input a,b,c,d,e,f; + wire A = b&c; + wire B = c|d; + wire C = e&f; + wire D = A|B; + wire E = a&D; + wire F = D&C; + wire G = F|B; + wire H = a&F; + wire I = E|G; + wire J = G&C; + output p = H&I; + output q = A|J; +endmodule diff --git a/passes/tests/flowmap/pack1.v b/passes/tests/flowmap/pack1.v new file mode 100644 index 000000000..9454edf3c --- /dev/null +++ b/passes/tests/flowmap/pack1.v @@ -0,0 +1,11 @@ +// Exact reproduction of Figure 3(a) from 10.1109/92.285741. +module top(...); + input a,b,c,d,e,f,g,h; + wire x = !(c|d); + wire y = !(e&f); + wire u = !(a&b); + wire v = !(x|y); + wire w = !(g&h); + output s = !(u|v); + output t = !(v|w); +endmodule diff --git a/passes/tests/flowmap/pack1p.v b/passes/tests/flowmap/pack1p.v new file mode 100644 index 000000000..fdb278833 --- /dev/null +++ b/passes/tests/flowmap/pack1p.v @@ -0,0 +1,11 @@ +// Like pack1.v, but results in a simpler network. +module top(...); + input a,b,c,d,e,f,g,h; + wire x = c|d; + wire y = e&f; + wire u = a&b; + wire v = x|y; + wire w = g&h; + output s = u|v; + output t = v|w; +endmodule diff --git a/passes/tests/flowmap/pack2.v b/passes/tests/flowmap/pack2.v new file mode 100644 index 000000000..445e4afb0 --- /dev/null +++ b/passes/tests/flowmap/pack2.v @@ -0,0 +1,15 @@ +// Exact reproduction of Figure 4(a) from 10.1109/92.285741. +module top(...); + (* $flowmap_level=1 *) input a; + (* $flowmap_level=1 *) input b; + (* $flowmap_level=2 *) input c; + (* $flowmap_level=1 *) input d; + (* $flowmap_level=3 *) input e; + (* $flowmap_level=1 *) input f; + wire u = !(a&b); + wire w = !(c|d); + wire v = !(u|w); + wire n0 = !(w&e); + wire n1 = !(n0|f); + output n2 = !(v&n1); +endmodule diff --git a/passes/tests/flowmap/pack2p.v b/passes/tests/flowmap/pack2p.v new file mode 100644 index 000000000..d4b41733d --- /dev/null +++ b/passes/tests/flowmap/pack2p.v @@ -0,0 +1,15 @@ +// Like pack2.v, but results in a simpler network. +module top(...); + (* $flowmap_level=1 *) input a; + (* $flowmap_level=1 *) input b; + (* $flowmap_level=2 *) input c; + (* $flowmap_level=1 *) input d; + (* $flowmap_level=3 *) input e; + (* $flowmap_level=1 *) input f; + wire u = a&b; + wire w = c|d; + wire v = u|w; + wire n0 = w&e; + wire n1 = n0|f; + output n2 = v&n1; +endmodule diff --git a/passes/tests/flowmap/pack3.v b/passes/tests/flowmap/pack3.v new file mode 100644 index 000000000..06147a1aa --- /dev/null +++ b/passes/tests/flowmap/pack3.v @@ -0,0 +1,15 @@ +// Exact reproduction of Figure 5(a) (bottom) from 10.1109/92.285741. +module top(...); + input a,b,c,d,e,f,g,h,i,j; + wire x = !(a&b); + wire y = !(c|d); + wire z = !(e|f); + wire n0 = !(g&h); + wire n1 = !(i|j); + wire w = !(x&y); + wire n2 = !(z&n0); + wire n3 = !(n0|n1); + wire n4 = !(n2|n3); + wire v = !(w|n5); + output u = !(w&v); +endmodule diff --git a/passes/tests/flowmap/pack3p.v b/passes/tests/flowmap/pack3p.v new file mode 100644 index 000000000..bc6ac1757 --- /dev/null +++ b/passes/tests/flowmap/pack3p.v @@ -0,0 +1,15 @@ +// Like pack2.v, but results in a simpler network. +module top(...); + input a,b,c,d,e,f,g,h,i,j; + wire x = a&b; + wire y = c|d; + wire z = e|f; + wire n0 = g&h; + wire n1 = i|j; + wire w = x&y; + wire n2 = z&n0; + wire n3 = n0|n1; + wire n4 = n2|n3; + wire v = w|n5; + output u = w&v; +endmodule |