30 files changed, 2509 insertions, 144 deletions
diff --git a/passes/cmds/Makefile.inc b/passes/cmds/Makefile.inc
index 44a83b2b9..c8067a8be 100644
--- a/passes/cmds/Makefile.inc
+++ b/passes/cmds/Makefile.inc
@@ -29,4 +29,4 @@ OBJS += passes/cmds/chformal.o
 OBJS += passes/cmds/chtype.o
 OBJS += passes/cmds/blackbox.o
 OBJS += passes/cmds/ltp.o
-
+OBJS += passes/cmds/bugpoint.o
diff --git a/passes/cmds/bugpoint.cc b/passes/cmds/bugpoint.cc
new file mode 100644
index 000000000..606276e64
--- /dev/null
+++ b/passes/cmds/bugpoint.cc
@@ -0,0 +1,369 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2018  whitequark <whitequark@whitequark.org>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "kernel/yosys.h"
+#include "backends/ilang/ilang_backend.h"
+
+USING_YOSYS_NAMESPACE
+using namespace ILANG_BACKEND;
+PRIVATE_NAMESPACE_BEGIN
+
+struct BugpointPass : public Pass {
+	BugpointPass() : Pass("bugpoint", "minimize testcases") { }
+	void help() YS_OVERRIDE
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    bugpoint [options]\n");
+		log("\n");
+		log("This command minimizes testcases that crash Yosys. It removes an arbitrary part\n");
+		log("of the design and recursively invokes Yosys with a given script, repeating these\n");
+		log("steps while it can find a smaller design that still causes a crash. Once this\n");
+		log("command finishes, it replaces the current design with the smallest testcase it\n");
+		log("was able to produce.\n");
+		log("\n");
+		log("It is possible to specify the kinds of design part that will be removed. If none\n");
+		log("are specified, all parts of design will be removed.\n");
+		log("\n");
+		log("    -yosys <filename>\n");
+		log("        use this Yosys binary. if not specified, `yosys` is used.\n");
+		log("\n");
+		log("    -script <filename>\n");
+		log("        use this script to crash Yosys. required.\n");
+		log("\n");
+		log("    -grep <string>\n");
+		log("        only consider crashes that place this string in the log file.\n");
+		log("\n");
+		log("    -fast\n");
+		log("        run `clean -purge` after each minimization step. converges faster, but\n");
+		log("        produces larger testcases, and may fail to produce any testcase at all if\n");
+		log("        the crash is related to dangling wires.\n");
+		log("\n");
+		log("    -clean\n");
+		log("        run `clean -purge` before checking testcase and after finishing. produces\n");
+		log("        smaller and more useful testcases, but may fail to produce any testcase\n");
+		log("        at all if the crash is related to dangling wires.\n");
+		log("\n");
+		log("    -modules\n");
+		log("        try to remove modules.\n");
+		log("\n");
+		log("    -ports\n");
+		log("        try to remove module ports.\n");
+		log("\n");
+		log("    -cells\n");
+		log("        try to remove cells.\n");
+		log("\n");
+		log("    -connections\n");
+		log("        try to reconnect ports to 'x.\n");
+		log("\n");
+	}
+
+	bool run_yosys(RTLIL::Design *design, string yosys_cmd, string script)
+	{
+		design->sort();
+
+		std::ofstream f("bugpoint-case.il");
+		ILANG_BACKEND::dump_design(f, design, /*only_selected=*/false, /*flag_m=*/true, /*flag_n=*/false);
+		f.close();
+
+		string yosys_cmdline = stringf("%s -qq -L bugpoint-case.log -s %s bugpoint-case.il", yosys_cmd.c_str(), script.c_str());
+		return run_command(yosys_cmdline) == 0;
+	}
+
+	bool check_logfile(string grep)
+	{
+		if (grep.empty())
+			return true;
+
+		std::ifstream f("bugpoint-case.log");
+		while (!f.eof())
+		{
+			string line;
+			getline(f, line);
+			if (line.find(grep) != std::string::npos)
+				return true;
+		}
+		return false;
+	}
+
+	RTLIL::Design *clean_design(RTLIL::Design *design, bool do_clean = true, bool do_delete = false)
+	{
+		if (!do_clean)
+			return design;
+
+		RTLIL::Design *design_copy = new RTLIL::Design;
+		for (auto &it : design->modules_)
+			design_copy->add(it.second->clone());
+		Pass::call(design_copy, "clean -purge");
+
+		if (do_delete)
+			delete design;
+		return design_copy;
+	}
+
+	RTLIL::Design *simplify_something(RTLIL::Design *design, int &seed, bool stage2, bool modules, bool ports, bool cells, bool connections)
+	{
+		RTLIL::Design *design_copy = new RTLIL::Design;
+		for (auto &it : design->modules_)
+			design_copy->add(it.second->clone());
+
+		int index = 0;
+		if (modules)
+		{
+			for (auto &it : design_copy->modules_)
+			{
+				if (it.second->get_bool_attribute("\\blackbox"))
+					continue;
+
+				if (index++ == seed)
+				{
+					log("Trying to remove module %s.\n", it.first.c_str());
+					design_copy->remove(it.second);
+					return design_copy;
+				}
+			}
+		}
+		if (ports)
+		{
+			for (auto mod : design_copy->modules())
+			{
+				if (mod->get_bool_attribute("\\blackbox"))
+					continue;
+
+				for (auto wire : mod->wires())
+				{
+					if (!stage2 && wire->get_bool_attribute("$bugpoint"))
+						continue;
+
+					if (wire->port_input || wire->port_output)
+					{
+						if (index++ == seed)
+						{
+							log("Trying to remove module port %s.\n", log_signal(wire));
+							wire->port_input = wire->port_output = false;
+							mod->fixup_ports();
+							return design_copy;
+						}
+					}
+				}
+			}
+		}
+		if (cells)
+		{
+			for (auto mod : design_copy->modules())
+			{
+				if (mod->get_bool_attribute("\\blackbox"))
+					continue;
+
+				for (auto &it : mod->cells_)
+				{
+					if (index++ == seed)
+					{
+						log("Trying to remove cell %s.%s.\n", mod->name.c_str(), it.first.c_str());
+						mod->remove(it.second);
+						return design_copy;
+					}
+				}
+			}
+		}
+		if (connections)
+		{
+			for (auto mod : design_copy->modules())
+			{
+				if (mod->get_bool_attribute("\\blackbox"))
+					continue;
+
+				for (auto cell : mod->cells())
+				{
+					for (auto it : cell->connections_)
+					{
+						RTLIL::SigSpec port = cell->getPort(it.first);
+						bool is_undef = port.is_fully_undef();
+						bool is_port = port.is_wire() && (port.as_wire()->port_input || port.as_wire()->port_output);
+
+						if(is_undef || (!stage2 && is_port))
+							continue;
+
+						if (index++ == seed)
+						{
+							log("Trying to remove cell port %s.%s.%s.\n", mod->name.c_str(), cell->name.c_str(), it.first.c_str());
+							RTLIL::SigSpec port_x(State::Sx, port.size());
+							cell->unsetPort(it.first);
+							cell->setPort(it.first, port_x);
+							return design_copy;
+						}
+
+						if (!stage2 && (cell->input(it.first) || cell->output(it.first)) && index++ == seed)
+						{
+							log("Trying to expose cell port %s.%s.%s as module port.\n", mod->name.c_str(), cell->name.c_str(), it.first.c_str());
+							RTLIL::Wire *wire = mod->addWire(NEW_ID, port.size());
+							wire->set_bool_attribute("$bugpoint");
+							wire->port_input = cell->input(it.first);
+							wire->port_output = cell->output(it.first);
+							cell->unsetPort(it.first);
+							cell->setPort(it.first, wire);
+							mod->fixup_ports();
+							return design_copy;
+						}
+					}
+				}
+			}
+		}
+		return NULL;
+	}
+
+	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+	{
+		string yosys_cmd = "yosys", script, grep;
+		bool fast = false, clean = false;
+		bool modules = false, ports = false, cells = false, connections = false, has_part = false;
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++)
+		{
+			if (args[argidx] == "-yosys" && argidx + 1 < args.size()) {
+				yosys_cmd = args[++argidx];
+				continue;
+			}
+			if (args[argidx] == "-script" && argidx + 1 < args.size()) {
+				script = args[++argidx];
+				continue;
+			}
+			if (args[argidx] == "-grep" && argidx + 1 < args.size()) {
+				grep = args[++argidx];
+				continue;
+			}
+			if (args[argidx] == "-fast") {
+				fast = true;
+				continue;
+			}
+			if (args[argidx] == "-clean") {
+				clean = true;
+				continue;
+			}
+			if (args[argidx] == "-modules") {
+				modules = true;
+				has_part = true;
+				continue;
+			}
+			if (args[argidx] == "-ports") {
+				ports = true;
+				has_part = true;
+				continue;
+			}
+			if (args[argidx] == "-cells") {
+				cells = true;
+				has_part = true;
+				continue;
+			}
+			if (args[argidx] == "-connections") {
+				connections = true;
+				has_part = true;
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		if (!has_part)
+		{
+			modules = true;
+			ports = true;
+			cells = true;
+			connections = true;
+		}
+
+		if (!design->full_selection())
+			log_cmd_error("This command only operates on fully selected designs!\n");
+
+		RTLIL::Design *crashing_design = clean_design(design, clean);
+		if (run_yosys(crashing_design, yosys_cmd, script))
+			log_cmd_error("The provided script file and Yosys binary do not crash on this design!\n");
+		if (!check_logfile(grep))
+			log_cmd_error("The provided grep string is not found in the log file!\n");
+
+		int seed = 0, crashing_seed = seed;
+		bool found_something = false, stage2 = false;
+		while (true)
+		{
+			if (RTLIL::Design *simplified = simplify_something(crashing_design, seed, stage2, modules, ports, cells, connections))
+			{
+				simplified = clean_design(simplified, fast, /*do_delete=*/true);
+
+				bool crashes;
+				if (clean)
+				{
+					RTLIL::Design *testcase = clean_design(simplified);
+					crashes = !run_yosys(testcase, yosys_cmd, script);
+					delete testcase;
+				}
+				else
+				{
+					crashes = !run_yosys(simplified, yosys_cmd, script);
+				}
+
+				if (crashes && check_logfile(grep))
+				{
+					log("Testcase crashes.\n");
+					if (crashing_design != design)
+						delete crashing_design;
+					crashing_design = simplified;
+					crashing_seed = seed;
+					found_something = true;
+				}
+				else
+				{
+					log("Testcase does not crash.\n");
+					delete simplified;
+					seed++;
+				}
+			}
+			else
+			{
+				seed = 0;
+				if (found_something)
+					found_something = false;
+				else
+				{
+					if (!stage2)
+					{
+						log("Demoting introduced module ports.\n");
+						stage2 = true;
+					}
+					else
+					{
+						log("Simplifications exhausted.\n");
+						break;
+					}
+				}
+			}
+		}
+
+		if (crashing_design != design)
+		{
+			Pass::call(design, "design -reset");
+			crashing_design = clean_design(crashing_design, clean, /*do_delete=*/true);
+			for (auto &it : crashing_design->modules_)
+				design->add(it.second->clone());
+			delete crashing_design;
+		}
+	}
+} BugpointPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/cmds/chformal.cc b/passes/cmds/chformal.cc
index 522758eae..7e32da65f 100644
--- a/passes/cmds/chformal.cc
+++ b/passes/cmds/chformal.cc
@@ -32,7 +32,7 @@ struct ChformalPass : public Pass {
 		log("    chformal [types] [mode] [options] [selection]\n");
 		log("\n");
 		log("Make changes to the formal constraints of the design. The [types] options\n");
-		log("the type of constraint to operate on. If none of the folling options is given,\n");
+		log("the type of constraint to operate on. If none of the following options are given,\n");
 		log("the command will operate on all constraint types:\n");
 		log("\n");
 		log("    -assert       $assert cells, representing assert(...) constraints\n");
@@ -59,7 +59,7 @@ struct ChformalPass : public Pass {
 		log("    -assume2assert\n");
 		log("    -live2fair\n");
 		log("    -fair2live\n");
-		log("        change the roles of cells as indicated. this options can be combined\n");
+		log("        change the roles of cells as indicated. these options can be combined\n");
 		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
diff --git a/passes/cmds/connect.cc b/passes/cmds/connect.cc
index d480b79ac..f93bada27 100644
--- a/passes/cmds/connect.cc
+++ b/passes/cmds/connect.cc
@@ -137,7 +137,7 @@ struct ConnectPass : public Pass {
 		if (!set_lhs.empty())
 		{
 			if (!unset_expr.empty() || !port_cell.empty())
-				log_cmd_error("Cant use -set together with -unset and/or -port.\n");
+				log_cmd_error("Can't use -set together with -unset and/or -port.\n");
 
 			RTLIL::SigSpec sig_lhs, sig_rhs;
 			if (!RTLIL::SigSpec::parse_sel(sig_lhs, design, module, set_lhs))
@@ -157,7 +157,7 @@ struct ConnectPass : public Pass {
 		if (!unset_expr.empty())
 		{
 			if (!port_cell.empty() || flag_nounset)
-				log_cmd_error("Cant use -unset together with -port and/or -nounset.\n");
+				log_cmd_error("Can't use -unset together with -port and/or -nounset.\n");
 
 			RTLIL::SigSpec sig;
 			if (!RTLIL::SigSpec::parse_sel(sig, design, module, unset_expr))
@@ -170,7 +170,7 @@ struct ConnectPass : public Pass {
 		if (!port_cell.empty())
 		{
 			if (flag_nounset)
-				log_cmd_error("Cant use -port together with -nounset.\n");
+				log_cmd_error("Can't use -port together with -nounset.\n");
 
 			if (module->cells_.count(RTLIL::escape_id(port_cell)) == 0)
 				log_cmd_error("Can't find cell %s.\n", port_cell.c_str());
diff --git a/passes/cmds/rename.cc b/passes/cmds/rename.cc
index 4b4af0a40..698ce7235 100644
--- a/passes/cmds/rename.cc
+++ b/passes/cmds/rename.cc
@@ -61,6 +61,42 @@ static std::string derive_name_from_src(const std::string &src, int counter)
 		return stringf("\\%s$%d", src_base.c_str(), counter);
 }
 
+static IdString derive_name_from_wire(const RTLIL::Cell &cell)
+{
+	// Find output
+	const SigSpec *output = nullptr;
+	int num_outputs = 0;
+	for (auto &connection : cell.connections()) {
+		if (cell.output(connection.first)) {
+			output = &connection.second;
+			num_outputs++;
+		}
+	}
+
+	if (num_outputs != 1) // Skip cells thad drive multiple outputs
+		return cell.name;
+
+	std::string name = "";
+	for (auto &chunk : output->chunks()) {
+		// Skip cells that drive privately named wires
+		if (!chunk.wire || chunk.wire->name.str()[0] == '$')
+			return cell.name;
+
+		if (name != "")
+			name += "$";
+
+		name += chunk.wire->name.str();
+		if (chunk.wire->width != chunk.width) {
+			name += "[";
+			if (chunk.width != 1)
+				name += std::to_string(chunk.offset + chunk.width) + ":";
+			name += std::to_string(chunk.offset) + "]";
+		}
+	}
+
+	return name + cell.type.str();
+}
+
 struct RenamePass : public Pass {
 	RenamePass() : Pass("rename", "rename object in the design") { }
 	void help() YS_OVERRIDE
@@ -77,6 +113,10 @@ struct RenamePass : public Pass {
 		log("Assign names auto-generated from the src attribute to all selected wires and\n");
 		log("cells with private names.\n");
 		log("\n");
+		log("    rename -wire [selection]\n");
+		log("Assign auto-generated names based on the wires they drive to all selected\n");
+		log("cells with private names. Ignores cells driving privatly named wires.\n");
+		log("\n");
 		log("    rename -enumerate [-pattern <pattern>] [selection]\n");
 		log("\n");
 		log("Assign short auto-generated names to all selected wires and cells with private\n");
@@ -98,6 +138,7 @@ struct RenamePass : public Pass {
 	{
 		std::string pattern_prefix = "_", pattern_suffix = "_";
 		bool flag_src = false;
+		bool flag_wire = false;
 		bool flag_enumerate = false;
 		bool flag_hide = false;
 		bool flag_top = false;
@@ -112,6 +153,11 @@ struct RenamePass : public Pass {
 				got_mode = true;
 				continue;
 			}
+			if (arg == "-wire" && !got_mode) {
+				flag_wire = true;
+				got_mode = true;
+				continue;
+			}
 			if (arg == "-enumerate" && !got_mode) {
 				flag_enumerate = true;
 				got_mode = true;
@@ -167,6 +213,26 @@ struct RenamePass : public Pass {
 			}
 		}
 		else
+		if (flag_wire)
+		{
+			extra_args(args, argidx, design);
+
+			for (auto &mod : design->modules_)
+			{
+				RTLIL::Module *module = mod.second;
+				if (!design->selected(module))
+					continue;
+
+				dict<RTLIL::IdString, RTLIL::Cell*> new_cells;
+				for (auto &it : module->cells_) {
+					if (it.first[0] == '$' && design->selected(module, it.second))
+						it.second->name = derive_name_from_wire(*it.second);
+					new_cells[it.second->name] = it.second;
+				}
+				module->cells_.swap(new_cells);
+			}
+		}
+		else
 		if (flag_enumerate)
 		{
 			extra_args(args, argidx, design);
diff --git a/passes/cmds/select.cc b/passes/cmds/select.cc
index ba407ea8c..b5e8ef1af 100644
--- a/passes/cmds/select.cc
+++ b/passes/cmds/select.cc
@@ -987,7 +987,7 @@ struct SelectPass : public Pass {
 		log("list of selected objects.\n");
 		log("\n");
 		log("Note that many commands support an optional [selection] argument that can be\n");
-		log("used to YS_OVERRIDE the global selection for the command. The syntax of this\n");
+		log("used to override the global selection for the command. The syntax of this\n");
 		log("optional argument is identical to the syntax of the <selection> argument\n");
 		log("described here.\n");
 		log("\n");
diff --git a/passes/cmds/setundef.cc b/passes/cmds/setundef.cc
index a1dfa9b5c..56ef2d125 100644
--- a/passes/cmds/setundef.cc
+++ b/passes/cmds/setundef.cc
@@ -137,7 +137,7 @@ struct SetundefPass : public Pass {
 		log("        replace with $anyconst drivers (for formal)\n");
 		log("\n");
 		log("    -random <seed>\n");
-		log("        replace with random bits using the specified integer als seed\n");
+		log("        replace with random bits using the specified integer as seed\n");
 		log("        value for the random number generator.\n");
 		log("\n");
 		log("    -init\n");
diff --git a/passes/cmds/show.cc b/passes/cmds/show.cc
index a48873244..58acd302d 100644
--- a/passes/cmds/show.cc
+++ b/passes/cmds/show.cc
@@ -623,7 +623,7 @@ struct ShowPass : public Pass {
 		log("        assigned to each unique value of this attribute.\n");
 		log("\n");
 		log("    -width\n");
-		log("        annotate busses with a label indicating the width of the bus.\n");
+		log("        annotate buses with a label indicating the width of the bus.\n");
 		log("\n");
 		log("    -signed\n");
 		log("        mark ports (A, B) that are declared as signed (using the [AB]_SIGNED\n");
diff --git a/passes/cmds/tee.cc b/passes/cmds/tee.cc
index ff80f3859..ee96ace86 100644
--- a/passes/cmds/tee.cc
+++ b/passes/cmds/tee.cc
@@ -37,7 +37,7 @@ struct TeePass : public Pass {
 		log("specified logfile(s).\n");
 		log("\n");
 		log("    -q\n");
-		log("        Do not print output to the normal destination (console and/or log file)\n");
+		log("        Do not print output to the normal destination (console and/or log file).\n");
 		log("\n");
 		log("    -o logfile\n");
 		log("        Write output to this file, truncate if exists.\n");
@@ -46,7 +46,7 @@ struct TeePass : public Pass {
 		log("        Write output to this file, append if exists.\n");
 		log("\n");
 		log("    +INT, -INT\n");
-		log("        Add/subract INT from the -v setting for this command.\n");
+		log("        Add/subtract INT from the -v setting for this command.\n");
 		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
diff --git a/passes/fsm/fsm_detect.cc b/passes/fsm/fsm_detect.cc
index fc504e98c..5ae991b28 100644
--- a/passes/fsm/fsm_detect.cc
+++ b/passes/fsm/fsm_detect.cc
@@ -196,13 +196,13 @@ static void detect_fsm(RTLIL::Wire *wire)
 		vector<string> warnings;
 
 		if (is_module_port)
-			warnings.push_back("Forcing fsm recoding on module port might result in larger circuit.\n");
+			warnings.push_back("Forcing FSM recoding on module port might result in larger circuit.\n");
 
 		if (!looks_like_good_state_reg)
-			warnings.push_back("Users of state reg look like fsm recoding might result in larger circuit.\n");
+			warnings.push_back("Users of state reg look like FSM recoding might result in larger circuit.\n");
 
 		if (has_init_attr)
-			warnings.push_back("Init value on fsm state registers are ignored. Possible simulation-synthesis mismatch!");
+			warnings.push_back("Initialization value on FSM state register is ignored. Possible simulation-synthesis mismatch!\n");
 
 		if (!looks_like_state_reg)
 			warnings.push_back("Doesn't look like a proper FSM. Possible simulation-synthesis mismatch!\n");
@@ -236,7 +236,7 @@ static void detect_fsm(RTLIL::Wire *wire)
 			log("    Users of register don't seem to benefit from recoding.\n");
 
 		if (has_init_attr)
-			log("    Register has an initialization value.");
+			log("    Register has an initialization value.\n");
 
 		if (is_self_resetting)
 			log("    Circuit seems to be self-resetting.\n");
diff --git a/passes/fsm/fsm_extract.cc b/passes/fsm/fsm_extract.cc
index 67551f673..6095eaf30 100644
--- a/passes/fsm/fsm_extract.cc
+++ b/passes/fsm/fsm_extract.cc
@@ -178,7 +178,7 @@ undef_bit_in_next_state:
 			log_state_in = fsm_data.state_table.at(state_in);
 
 		if (states.count(ce.values_map(ce.assign_map(dff_in)).as_const()) == 0) {
-			log("  transition: %10s %s -> INVALID_STATE(%s) %s  <ignored invalid transistion!>%s\n",
+			log("  transition: %10s %s -> INVALID_STATE(%s) %s  <ignored invalid transition!>%s\n",
 					log_signal(log_state_in), log_signal(tr.ctrl_in),
 					log_signal(ce.values_map(ce.assign_map(dff_in))), log_signal(tr.ctrl_out),
 					undef_bit_in_next_state_mode ? " SHORTENED" : "");
@@ -194,7 +194,7 @@ undef_bit_in_next_state:
 					log_signal(log_state_in), log_signal(tr.ctrl_in),
 					log_signal(fsm_data.state_table[tr.state_out]), log_signal(tr.ctrl_out));
 		} else {
-			log("  transition: %10s %s -> %10s %s  <ignored undef transistion!>\n",
+			log("  transition: %10s %s -> %10s %s  <ignored undef transition!>\n",
 					log_signal(log_state_in), log_signal(tr.ctrl_in),
 					log_signal(fsm_data.state_table[tr.state_out]), log_signal(tr.ctrl_out));
 		}
diff --git a/passes/hierarchy/hierarchy.cc b/passes/hierarchy/hierarchy.cc
index 0c782b8ab..0e28dbca2 100644
--- a/passes/hierarchy/hierarchy.cc
+++ b/passes/hierarchy/hierarchy.cc
@@ -543,7 +543,7 @@ struct HierarchyPass : public Pass {
 		log("        an unknown module is used as cell type.\n");
 		log("\n");
 		log("    -simcheck\n");
-		log("        like -check, but also thow an error if blackbox modules are\n");
+		log("        like -check, but also throw an error if blackbox modules are\n");
 		log("        instantiated, and throw an error if the design has no top module\n");
 		log("\n");
 		log("    -purge_lib\n");
diff --git a/passes/memory/memory_collect.cc b/passes/memory/memory_collect.cc
index 70d98713c..369fcc84e 100644
--- a/passes/memory/memory_collect.cc
+++ b/passes/memory/memory_collect.cc
@@ -184,9 +184,6 @@ Cell *handle_memory(Module *module, RTLIL::Memory *memory)
 	mem->parameters["\\OFFSET"] = Const(memory->start_offset);
 	mem->parameters["\\SIZE"] = Const(memory->size);
 	mem->parameters["\\ABITS"] = Const(addr_bits);
-
-	while (GetSize(init_data) > 1 && init_data.bits.back() == State::Sx && init_data.bits[GetSize(init_data)-2] == State::Sx)
-		init_data.bits.pop_back();
 	mem->parameters["\\INIT"] = init_data;
 
 	log_assert(sig_wr_clk.size() == wr_ports);
diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc
index 0f596b1f4..c3e0a2a40 100644
--- a/passes/opt/Makefile.inc
+++ b/passes/opt/Makefile.inc
@@ -6,12 +6,12 @@ OBJS += passes/opt/opt_reduce.o
 OBJS += passes/opt/opt_rmdff.o
 OBJS += passes/opt/opt_clean.o
 OBJS += passes/opt/opt_expr.o
-OBJS += passes/opt/opt_lut.o
 
 ifneq ($(SMALL),1)
 OBJS += passes/opt/share.o
 OBJS += passes/opt/wreduce.o
 OBJS += passes/opt/opt_demorgan.o
 OBJS += passes/opt/rmports.o
+OBJS += passes/opt/opt_lut.o
 endif
 
diff --git a/passes/opt/opt_expr.cc b/passes/opt/opt_expr.cc
index 610edc5e9..26a3ca7bc 100644
--- a/passes/opt/opt_expr.cc
+++ b/passes/opt/opt_expr.cc
@@ -259,6 +259,22 @@ bool is_one_or_minus_one(const Const &value, bool is_signed, bool &is_negative)
 	return last_bit_one;
 }
 
+int get_highest_hot_index(RTLIL::SigSpec signal)
+{
+	for (int i = GetSize(signal) - 1; i >= 0; i--)
+	{
+		if (signal[i] == RTLIL::State::S0)
+			continue;
+
+		if (signal[i] == RTLIL::State::S1)
+			return i;
+
+		break;
+	}
+
+	return -1;
+}
+
 // if the signal has only one bit set, return the index of that bit.
 // otherwise return -1
 int get_onehot_bit_index(RTLIL::SigSpec signal)
@@ -1344,118 +1360,139 @@ void replace_const_cells(RTLIL::Design *design, RTLIL::Module *module, bool cons
 			}
 		}
 
-		// replace a<0 or a>=0 with the top bit of a
+		// simplify comparisons
 		if (do_fine && (cell->type == "$lt" || cell->type == "$ge" || cell->type == "$gt" || cell->type == "$le"))
 		{
-			//used to decide whether the signal needs to be negated
-			bool is_lt = false;
-
-			//references the variable signal in the comparison
-			RTLIL::SigSpec sigVar;
-
-			//references the constant signal in the comparison
-			RTLIL::SigSpec sigConst;
-
-			// note that this signal must be constant for the optimization
-			// to take place, but it is not checked beforehand.
-			// If new passes are added, this signal must be checked for const-ness
-
-			//width of the variable port
-			int width;
-			int const_width;
-
-			bool var_signed;
-
-			if (cell->type == "$lt" || cell->type == "$ge") {
-				is_lt = cell->type == "$lt" ? 1 : 0;
-				sigVar = cell->getPort("\\A");
-				sigConst = cell->getPort("\\B");
-				width = cell->parameters["\\A_WIDTH"].as_int();
-				const_width = cell->parameters["\\B_WIDTH"].as_int();
-				var_signed = cell->parameters["\\A_SIGNED"].as_bool();
-			} else
-			if (cell->type == "$gt" || cell->type == "$le") {
-				is_lt = cell->type == "$gt" ? 1 : 0;
-				sigVar = cell->getPort("\\B");
-				sigConst = cell->getPort("\\A");
-				width = cell->parameters["\\B_WIDTH"].as_int();
-				const_width = cell->parameters["\\A_WIDTH"].as_int();
-				var_signed = cell->parameters["\\B_SIGNED"].as_bool();
-			} else
-				log_abort();
+			IdString cmp_type = cell->type;
+			SigSpec var_sig = cell->getPort("\\A");
+			SigSpec const_sig = cell->getPort("\\B");
+			int var_width = cell->parameters["\\A_WIDTH"].as_int();
+			int const_width = cell->parameters["\\B_WIDTH"].as_int();
+			bool is_signed = cell->getParam("\\A_SIGNED").as_bool();
 
-			// replace a(signed) < 0 with the high bit of a
-			if (sigConst.is_fully_const() && sigConst.is_fully_zero() && var_signed == true)
+			if (!const_sig.is_fully_const())
 			{
-				RTLIL::SigSpec a_prime(RTLIL::State::S0, cell->parameters["\\Y_WIDTH"].as_int());
-				a_prime[0] = sigVar[width - 1];
-				if (is_lt) {
-					log("Replacing %s cell `%s' (implementing X<0) with X[%d]: %s\n",
-							log_id(cell->type), log_id(cell), width-1, log_signal(a_prime));
-					module->connect(cell->getPort("\\Y"), a_prime);
-					module->remove(cell);
-				} else {
-					log("Replacing %s cell `%s' (implementing X>=0) with ~X[%d]: %s\n",
-							log_id(cell->type), log_id(cell), width-1, log_signal(a_prime));
-					module->addNot(NEW_ID, a_prime, cell->getPort("\\Y"));
-					module->remove(cell);
-				}
-				did_something = true;
-				goto next_cell;
-			} else
-			if (sigConst.is_fully_const() && sigConst.is_fully_def() && var_signed == false)
+				std::swap(var_sig, const_sig);
+				std::swap(var_width, const_width);
+				if (cmp_type == "$gt")
+					cmp_type = "$lt";
+				else if (cmp_type == "$lt")
+					cmp_type = "$gt";
+				else if (cmp_type == "$ge")
+					cmp_type = "$le";
+				else if (cmp_type == "$le")
+					cmp_type = "$ge";
+			}
+
+			if (const_sig.is_fully_def() && const_sig.is_fully_const())
 			{
-				if (sigConst.is_fully_zero()) {
-					RTLIL::SigSpec a_prime(RTLIL::State::S0, GetSize(cell->getPort("\\Y")));
-					if (is_lt) {
-						log("Replacing %s cell `%s' (implementing unsigned X<0) with constant false.\n",
-								log_id(cell->type), log_id(cell));
-						a_prime[0] = RTLIL::State::S0;
-					} else {
-						log("Replacing %s cell `%s' (implementing unsigned X>=0) with constant true.\n",
-								log_id(cell->type), log_id(cell));
-						a_prime[0] = RTLIL::State::S1;
+				std::string condition, replacement;
+				SigSpec replace_sig(State::S0, GetSize(cell->getPort("\\Y")));
+				bool replace = false;
+				bool remove = false;
+
+				if (!is_signed)
+				{ /* unsigned */
+					if (const_sig.is_fully_zero() && cmp_type == "$lt") {
+						condition   = "unsigned X<0";
+						replacement = "constant 0";
+						replace_sig[0] = State::S0;
+						replace = true;
+					}
+					if (const_sig.is_fully_zero() && cmp_type == "$ge") {
+						condition   = "unsigned X>=0";
+						replacement = "constant 1";
+						replace_sig[0] = State::S1;
+						replace = true;
+					}
+					if (const_width == var_width && const_sig.is_fully_ones() && cmp_type == "$gt") {
+						condition   = "unsigned X>~0";
+						replacement = "constant 0";
+						replace_sig[0] = State::S0;
+						replace = true;
+					}
+					if (const_width == var_width && const_sig.is_fully_ones() && cmp_type == "$le") {
+						condition   = "unsigned X<=~0";
+						replacement = "constant 1";
+						replace_sig[0] = State::S1;
+						replace = true;
 					}
-					module->connect(cell->getPort("\\Y"), a_prime);
-					module->remove(cell);
-					did_something = true;
-					goto next_cell;
-				}
 
-				int const_bit_set = get_onehot_bit_index(sigConst);
-				if (const_bit_set >= 0 && const_bit_set < width) {
-					int bit_set = const_bit_set;
-					RTLIL::SigSpec a_prime(RTLIL::State::S0, width - bit_set);
-					for (int i = bit_set; i < width; i++) {
-						a_prime[i - bit_set] = sigVar[i];
+					int const_bit_hot = get_onehot_bit_index(const_sig);
+					if (const_bit_hot >= 0 && const_bit_hot < var_width)
+					{
+						RTLIL::SigSpec var_high_sig(RTLIL::State::S0, var_width - const_bit_hot);
+						for (int i = const_bit_hot; i < var_width; i++) {
+							var_high_sig[i - const_bit_hot] = var_sig[i];
+						}
+
+						if (cmp_type == "$lt")
+						{
+							condition   = stringf("unsigned X<%s", log_signal(const_sig));
+							replacement = stringf("!X[%d:%d]", var_width - 1, const_bit_hot);
+							module->addLogicNot(NEW_ID, var_high_sig, cell->getPort("\\Y"));
+							remove = true;
+						}
+						if (cmp_type == "$ge")
+						{
+							condition   = stringf("unsigned X>=%s", log_signal(const_sig));
+							replacement = stringf("|X[%d:%d]", var_width - 1, const_bit_hot);
+							module->addReduceOr(NEW_ID, var_high_sig, cell->getPort("\\Y"));
+							remove = true;
+						}
 					}
-					if (is_lt) {
-						log("Replacing %s cell `%s' (implementing unsigned X<%s) with !X[%d:%d]: %s.\n",
-								log_id(cell->type), log_id(cell), log_signal(sigConst), width - 1, bit_set, log_signal(a_prime));
-						module->addLogicNot(NEW_ID, a_prime, cell->getPort("\\Y"));
-					} else {
-						log("Replacing %s cell `%s' (implementing unsigned X>=%s) with |X[%d:%d]: %s.\n",
-								log_id(cell->type), log_id(cell), log_signal(sigConst), width - 1, bit_set, log_signal(a_prime));
-						module->addReduceOr(NEW_ID, a_prime, cell->getPort("\\Y"));
+
+					int const_bit_set = get_highest_hot_index(const_sig);
+					if(const_bit_set >= var_width)
+					{
+						string cmp_name;
+						if (cmp_type == "$lt" || cmp_type == "$le")
+						{
+							if (cmp_type == "$lt") cmp_name = "<";
+							if (cmp_type == "$le") cmp_name = "<=";
+							condition   = stringf("unsigned X[%d:0]%s%s", var_width - 1, cmp_name.c_str(), log_signal(const_sig));
+							replacement = "constant 1";
+							replace_sig[0] = State::S1;
+							replace = true;
+						}
+						if (cmp_type == "$gt" || cmp_type == "$ge")
+						{
+							if (cmp_type == "$gt") cmp_name = ">";
+							if (cmp_type == "$ge") cmp_name = ">=";
+							condition   = stringf("unsigned X[%d:0]%s%s", var_width - 1, cmp_name.c_str(), log_signal(const_sig));
+							replacement = "constant 0";
+							replace_sig[0] = State::S0;
+							replace = true;
+						}
 					}
-					module->remove(cell);
-					did_something = true;
-					goto next_cell;
 				}
-				else if(const_bit_set >= width && const_bit_set >= 0){
-					RTLIL::SigSpec a_prime(RTLIL::State::S0, 1);
-					if(is_lt){
-						a_prime[0] = RTLIL::State::S1;
-						log("Replacing %s cell `%s' (implementing unsigned X[%d:0] < %s[%d:0]) with constant 0.\n", log_id(cell->type), log_id(cell), width-1, log_signal(sigConst),const_width-1);
+				else
+				{ /* signed */
+					if (const_sig.is_fully_zero() && cmp_type == "$lt")
+					{
+						condition   = "signed X<0";
+						replacement = stringf("X[%d]", var_width - 1);
+						replace_sig[0] = var_sig[var_width - 1];
+						replace = true;
 					}
-					else{
-						log("Replacing %s cell `%s' (implementing unsigned X[%d:0]>= %s[%d:0]) with constant 1.\n", log_id(cell->type), log_id(cell), width-1, log_signal(sigConst),const_width-1);
+					if (const_sig.is_fully_zero() && cmp_type == "$ge")
+					{
+						condition   = "signed X>=0";
+						replacement = stringf("X[%d]", var_width - 1);
+						module->addNot(NEW_ID, var_sig[var_width - 1], cell->getPort("\\Y"));
+						remove = true;
 					}
-					module->connect(cell->getPort("\\Y"), a_prime);
+				}
+
+				if (replace || remove)
+				{
+					log("Replacing %s cell `%s' (implementing %s) with %s.\n",
+							log_id(cell->type), log_id(cell), condition.c_str(), replacement.c_str());
+					if (replace)
+						module->connect(cell->getPort("\\Y"), replace_sig);
 					module->remove(cell);
 					did_something = true;
 					goto next_cell;
-
 				}
 			}
 		}
@@ -1477,7 +1514,7 @@ struct OptExprPass : public Pass {
 		log("    opt_expr [options] [selection]\n");
 		log("\n");
 		log("This pass performs const folding on internal cell types with constant inputs.\n");
-		log("It also performs some simple expression rewritring.\n");
+		log("It also performs some simple expression rewriting.\n");
 		log("\n");
 		log("    -mux_undef\n");
 		log("        remove 'undef' inputs from $mux, $pmux and $_MUX_ cells\n");
diff --git a/passes/opt/opt_lut.cc b/passes/opt/opt_lut.cc
index be050c713..26855fd70 100644
--- a/passes/opt/opt_lut.cc
+++ b/passes/opt/opt_lut.cc
@@ -36,7 +36,7 @@ struct OptLutWorker
 	dict<RTLIL::Cell*, pool<RTLIL::Cell*>> luts_dlogics;
 	dict<RTLIL::Cell*, pool<int>> luts_dlogic_inputs;
 
-	int combined_count = 0;
+	int eliminated_count = 0, combined_count = 0;
 
 	bool evaluate_lut(RTLIL::Cell *lut, dict<SigBit, bool> inputs)
 	{
@@ -133,7 +133,7 @@ struct OptLutWorker
 				// Second, make sure that the connection to dedicated logic is legal. If it is not legal,
 				// it means one of the two things:
 				//   * The connection is spurious. I.e. this is dedicated logic that will be packed
-				//     with some other LUT, and it just happens to be conected to this LUT as well.
+				//     with some other LUT, and it just happens to be connected to this LUT as well.
 				//   * The connection is illegal.
 				// In either of these cases, we don't need to concern ourselves with preserving the connection
 				// between this LUT and this dedicated logic cell.
@@ -188,7 +188,7 @@ struct OptLutWorker
 		show_stats_by_arity();
 
 		log("\n");
-		log("Combining LUTs.\n");
+		log("Eliminating LUTs.\n");
 		pool<RTLIL::Cell*> worklist = luts;
 		while (worklist.size())
 		{
@@ -198,6 +198,106 @@ struct OptLutWorker
 				break;
 			}
 
+			auto lut = worklist.pop();
+			SigSpec lut_input = sigmap(lut->getPort("\\A"));
+			pool<int> &lut_dlogic_inputs = luts_dlogic_inputs[lut];
+
+			vector<SigBit> lut_inputs;
+			for (auto &bit : lut_input)
+			{
+				if (bit.wire)
+					lut_inputs.push_back(sigmap(bit));
+			}
+
+			bool const0_match = true;
+			bool const1_match = true;
+			vector<bool> input_matches;
+			for (size_t i = 0; i < lut_inputs.size(); i++)
+				input_matches.push_back(true);
+
+			for (int eval = 0; eval < 1 << lut_inputs.size(); eval++)
+			{
+				dict<SigBit, bool> eval_inputs;
+				for (size_t i = 0; i < lut_inputs.size(); i++)
+					eval_inputs[lut_inputs[i]] = (eval >> i) & 1;
+				bool value = evaluate_lut(lut, eval_inputs);
+				if (value != 0)
+					const0_match = false;
+				if (value != 1)
+					const1_match = false;
+				for (size_t i = 0; i < lut_inputs.size(); i++)
+				{
+					if (value != eval_inputs[lut_inputs[i]])
+						input_matches[i] = false;
+				}
+			}
+
+			int input_match = -1;
+			for (size_t i = 0; i < lut_inputs.size(); i++)
+				if (input_matches[i])
+					input_match = i;
+
+			if (const0_match || const1_match || input_match != -1)
+			{
+				log("Found redundant cell %s.%s.\n", log_id(module), log_id(lut));
+
+				SigBit value;
+				if (const0_match)
+				{
+					log("  Cell evaluates constant 0.\n");
+					value = State::S0;
+				}
+				if (const1_match)
+				{
+					log("  Cell evaluates constant 1.\n");
+					value = State::S1;
+				}
+				if (input_match != -1) {
+					log("  Cell evaluates signal %s.\n", log_signal(lut_inputs[input_match]));
+					value = lut_inputs[input_match];
+				}
+
+				if (lut_dlogic_inputs.size())
+				{
+					log("  Not eliminating cell (connected to dedicated logic).\n");
+				}
+				else
+				{
+					SigSpec lut_output = lut->getPort("\\Y");
+					for (auto &port : index.query_ports(lut_output))
+					{
+						if (port.cell != lut && luts.count(port.cell))
+							worklist.insert(port.cell);
+					}
+
+					module->connect(lut_output, value);
+					sigmap.add(lut_output, value);
+
+					module->remove(lut);
+					luts.erase(lut);
+					luts_arity.erase(lut);
+					luts_dlogics.erase(lut);
+					luts_dlogic_inputs.erase(lut);
+
+					eliminated_count++;
+					if (limit > 0)
+						limit--;
+				}
+			}
+		}
+		show_stats_by_arity();
+
+		log("\n");
+		log("Combining LUTs.\n");
+		worklist = luts;
+		while (worklist.size())
+		{
+			if (limit == 0)
+			{
+				log("Limit reached.\n");
+				break;
+			}
+
 			auto lutA = worklist.pop();
 			SigSpec lutA_input = sigmap(lutA->getPort("\\A"));
 			SigSpec lutA_output = sigmap(lutA->getPort("\\Y")[0]);
@@ -487,16 +587,20 @@ struct OptLutPass : public Pass {
 		}
 		extra_args(args, argidx, design);
 
-		int total_count = 0;
+		int eliminated_count = 0, combined_count = 0;
 		for (auto module : design->selected_modules())
 		{
-			OptLutWorker worker(dlogic, module, limit - total_count);
-			total_count += worker.combined_count;
+			OptLutWorker worker(dlogic, module, limit - eliminated_count - combined_count);
+			eliminated_count += worker.eliminated_count;
+			combined_count   += worker.combined_count;
 		}
-		if (total_count)
+		if (eliminated_count)
+			design->scratchpad_set_bool("opt.did_something", true);
+		if (combined_count)
 			design->scratchpad_set_bool("opt.did_something", true);
 		log("\n");
-		log("Combined %d LUTs.\n", total_count);
+		log("Eliminated %d LUTs.\n", eliminated_count);
+		log("Combined %d LUTs.\n", combined_count);
 	}
 } OptLutPass;
 
diff --git a/passes/opt/share.cc b/passes/opt/share.cc
index b80280829..c85c27427 100644
--- a/passes/opt/share.cc
+++ b/passes/opt/share.cc
@@ -710,8 +710,12 @@ struct ShareWorker
 			RTLIL::Cell *supercell = module->addCell(NEW_ID, c1);
 			RTLIL::SigSpec addr1 = c1->getPort("\\ADDR");
 			RTLIL::SigSpec addr2 = c2->getPort("\\ADDR");
-			if (addr1 != addr2)
-				supercell->setPort("\\ADDR", module->Mux(NEW_ID, addr2, addr1, act));
+			if (GetSize(addr1) < GetSize(addr2))
+				addr1.extend_u0(GetSize(addr2));
+			else
+				addr2.extend_u0(GetSize(addr1));
+			supercell->setPort("\\ADDR", addr1 != addr2 ? module->Mux(NEW_ID, addr2, addr1, act) : addr1);
+			supercell->parameters["\\ABITS"] = RTLIL::Const(GetSize(addr1));
 			supercell_aux.insert(module->addPos(NEW_ID, supercell->getPort("\\DATA"), c2->getPort("\\DATA")));
 			supercell_aux.insert(supercell);
 			return supercell;
diff --git a/passes/opt/wreduce.cc b/passes/opt/wreduce.cc
index 0164f58d6..8063b86a6 100644
--- a/passes/opt/wreduce.cc
+++ b/passes/opt/wreduce.cc
@@ -235,8 +235,11 @@ struct WreduceWorker
 		} else {
 			while (GetSize(sig) > 0)
 			{
-				auto info = mi.query(sig[GetSize(sig)-1]);
+				auto bit = sig[GetSize(sig)-1];
+				if (keep_bits.count(bit))
+					break;
 
+				auto info = mi.query(bit);
 				if (info->is_output || GetSize(info->ports) > 1)
 					break;
 
diff --git a/passes/proc/proc_clean.cc b/passes/proc/proc_clean.cc
index b9e43d1db..52141a8ec 100644
--- a/passes/proc/proc_clean.cc
+++ b/passes/proc/proc_clean.cc
@@ -77,18 +77,42 @@ void proc_clean_switch(RTLIL::SwitchRule *sw, RTLIL::CaseRule *parent, bool &did
 	}
 	else
 	{
-		bool all_cases_are_empty = true;
-		for (auto cs : sw->cases) {
-			if (cs->actions.size() != 0 || cs->switches.size() != 0)
-				all_cases_are_empty = false;
+		bool all_fully_def = true;
+		for (auto cs : sw->cases)
+		{
 			if (max_depth != 0)
 				proc_clean_case(cs, did_something, count, max_depth-1);
+			int size = 0;
+			for (auto cmp : cs->compare)
+			{
+				size += cmp.size();
+				if (!cmp.is_fully_def())
+					all_fully_def = false;
+			}
+			if (sw->signal.size() != size)
+				all_fully_def = false;
 		}
-		if (all_cases_are_empty) {
-			did_something = true;
-			for (auto cs : sw->cases)
-				delete cs;
-			sw->cases.clear();
+		if (all_fully_def)
+		{
+			for (auto cs = sw->cases.begin(); cs != sw->cases.end();)
+			{
+				if ((*cs)->empty())
+				{
+					did_something = true;
+					delete *cs;
+					cs = sw->cases.erase(cs);
+				}
+				else ++cs;
+			}
+		}
+		else
+		{
+			while (!sw->cases.empty() && sw->cases.back()->empty())
+			{
+				did_something = true;
+				delete sw->cases.back();
+				sw->cases.pop_back();
+			}
 		}
 	}
 }
@@ -106,7 +130,7 @@ void proc_clean_case(RTLIL::CaseRule *cs, bool &did_something, int &count, int m
 	}
 	for (size_t i = 0; i < cs->switches.size(); i++) {
 		RTLIL::SwitchRule *sw = cs->switches[i];
-		if (sw->cases.size() == 0) {
+		if (sw->empty()) {
 			cs->switches.erase(cs->switches.begin() + (i--));
 			did_something = true;
 			delete sw;
diff --git a/passes/techmap/Makefile.inc b/passes/techmap/Makefile.inc
index 4faa0ab00..cf9e198ad 100644
--- a/passes/techmap/Makefile.inc
+++ b/passes/techmap/Makefile.inc
@@ -36,6 +36,7 @@ OBJS += passes/techmap/attrmvcp.o
 OBJS += passes/techmap/attrmap.o
 OBJS += passes/techmap/zinit.o
 OBJS += passes/techmap/dff2dffs.o
+OBJS += passes/techmap/flowmap.o
 endif
 
 GENFILES += passes/techmap/techmap.inc
diff --git a/passes/techmap/dffinit.cc b/passes/techmap/dffinit.cc
index a8eecc970..48390488e 100644
--- a/passes/techmap/dffinit.cc
+++ b/passes/techmap/dffinit.cc
@@ -43,18 +43,37 @@ struct DffinitPass : public Pass {
 		log("        initial value of 1 or 0. (multi-bit values are not supported in this\n");
 		log("        mode.)\n");
 		log("\n");
+		log("    -strinit <string for high> <string for low> \n");
+		log("        use string values in the command line to represent a single-bit\n");
+		log("        initial value of 1 or 0. (multi-bit values are not supported in this\n");
+		log("        mode.)\n");
+		log("\n");
+		log("    -noreinit\n");
+		log("        fail if the FF cell has already a defined initial value set in other\n");
+		log("        passes and the initial value of the net it drives is not equal to\n");
+		log("        the already defined initial value.\n");
+		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
 	{
 		log_header(design, "Executing DFFINIT pass (set INIT param on FF cells).\n");
 
 		dict<IdString, dict<IdString, IdString>> ff_types;
-		bool highlow_mode = false;
+		bool highlow_mode = false, noreinit = false;
+		std::string high_string, low_string;
 
 		size_t argidx;
 		for (argidx = 1; argidx < args.size(); argidx++) {
 			if (args[argidx] == "-highlow") {
 				highlow_mode = true;
+				high_string = "high";
+				low_string = "low";
+				continue;
+			}
+			if (args[argidx] == "-strinit" && argidx+2 < args.size()) {
+				highlow_mode = true;
+				high_string = args[++argidx];
+				low_string = args[++argidx];
 				continue;
 			}
 			if (args[argidx] == "-ff" && argidx+3 < args.size()) {
@@ -64,6 +83,10 @@ struct DffinitPass : public Pass {
 				ff_types[cell_name][output_port] = init_param;
 				continue;
 			}
+			if (args[argidx] == "-noreinit") {
+				noreinit = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -112,6 +135,10 @@ struct DffinitPass : public Pass {
 							continue;
 						while (GetSize(value.bits) <= i)
 							value.bits.push_back(State::S0);
+						if (noreinit && value.bits[i] != State::Sx && value.bits[i] != init_bits.at(sig[i]))
+							log_error("Trying to assign a different init value for %s.%s.%s which technically "
+									"have a conflicted init value.\n",
+									log_id(module), log_id(cell), log_id(it.second));
 						value.bits[i] = init_bits.at(sig[i]);
 						cleanup_bits.insert(sig[i]);
 					}
@@ -121,9 +148,9 @@ struct DffinitPass : public Pass {
 							log_error("Multi-bit init value for %s.%s.%s is incompatible with -highlow mode.\n",
 									log_id(module), log_id(cell), log_id(it.second));
 						if (value[0] == State::S1)
-							value = Const("high");
+							value = Const(high_string);
 						else
-							value = Const("low");
+							value = Const(low_string);
 					}
 
 					log("Setting %s.%s.%s (port=%s, net=%s) to %s.\n", log_id(module), log_id(cell), log_id(it.second),
diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc
new file mode 100644
index 000000000..ddbd7bf5d
--- /dev/null
+++ b/passes/techmap/flowmap.cc
@@ -0,0 +1,1613 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2018  whitequark <whitequark@whitequark.org>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+// [[CITE]] FlowMap algorithm
+// Jason Cong; Yuzheng Ding, "An Optimal Technology Mapping Algorithm for Delay Optimization in Lookup-Table Based FPGA Designs,"
+// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, Vol. 13, pp. 1-12, Jan. 1994.
+// doi: 10.1109/43.273754
+
+// [[CITE]] FlowMap-r algorithm
+// Jason Cong; Yuzheng Ding, "On Area/Depth Tradeoff in LUT-Based FPGA Technology Mapping,"
+// Very Large Scale Integration Systems, IEEE Transactions on, Vol. 2, June 1994.
+// doi: 10.1109/92.28574
+
+// Required reading material:
+//
+// Min-cut max-flow theorem:
+//   https://www.coursera.org/lecture/algorithms-part2/maxflow-mincut-theorem-beb9G
+// FlowMap paper:
+//   http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf   (short version)
+//   https://limsk.ece.gatech.edu/book/papers/flowmap.pdf (long version)
+// FlowMap-r paper:
+//   http://cadlab.cs.ucla.edu/~cong/papers/dac93.pdf     (short version)
+//   https://sci-hub.tw/10.1109/92.285741                 (long version)
+
+// Notes on correspondence between paper and implementation:
+//
+// 1. In the FlowMap paper, the nodes are logic elements (analogous to Yosys cells) and edges are wires. However, in our implementation,
+// we use an inverted approach: the nodes are Yosys wire bits, and the edges are derived from (but aren't represented by) Yosys cells.
+// This may seem counterintuitive. Three observations may help understanding this. First, for a cell with a 1-bit Y output that is
+// the sole driver of its output net (which is the typical case), these representations are equivalent, because there is an exact
+// correspondence between cells and output wires. Second, in the paper, primary inputs (analogous to Yosys cell or module ports) are
+// nodes, and in Yosys, inputs are wires; our approach allows a direct mapping from both primary inputs and 1-output logic elements to
+// flow graph nodes. Third, Yosys cells may have multiple outputs or multi-bit outputs, and by using Yosys wire bits as flow graph nodes,
+// such cells are supported without any additional effort; any Yosys cell with n output wire bits ends up being split into n flow graph
+// nodes.
+//
+// 2. The FlowMap paper introduces three networks: Nt, Nt', and Nt''. The network Nt is directly represented by a subgraph of RTLIL graph,
+// which is parsed into an equivalent but easier to traverse representation in FlowmapWorker. The network Nt' is built explicitly
+// from a subgraph of Nt, and uses a similar representation in FlowGraph. The network Nt'' is implicit in FlowGraph, which is possible
+// because of the following observation: each Nt' node corresponds to an Nt'' edge of capacity 1, and each Nt' edge corresponds to
+// an Nt'' edge of capacity ∞. Therefore, we only need to explicitly record flow for Nt' edges and through Nt' nodes.
+//
+// 3. The FlowMap paper ambiguously states: "Moreover, we can find such a cut (X′′, X̅′′) by performing a depth first search starting at
+// the source s, and including in X′′ all the nodes which are reachable from s." This actually refers to a specific kind of search,
+// min-cut computation. Min-cut computation involves computing the set of nodes reachable from s by an undirected path with no full
+// (i.e. zero capacity) forward edges or empty (i.e. no flow) backward edges. In addition, the depth first search is required to compute
+// a max-volume max-flow min-cut specifically, because a max-flow min-cut is not, in general, unique.
+
+// Notes on implementation:
+//
+// 1. To compute depth optimal packing, an intermediate representation is used, where each cell with n output bits is split into n graph
+// nodes. Each such graph node is represented directly with the wire bit (RTLIL::SigBit instance) that corresponds to the output bit
+// it is created from. Fan-in and fan-out are represented explicitly by edge lists derived from the RTLIL graph. This IR never changes
+// after it has been computed.
+//
+// In terms of data, this IR is comprised of `inputs`, `outputs`, `nodes`, `edges_fw` and `edges_bw` fields.
+//
+// We call this IR "gate IR".
+//
+// 2. To compute area optimal packing, another intermediate representation is used, which consists of some K-feasible cone for every node
+// that exists in the gate IR. Immediately after depth optimal packing with FlowMap, each such cone occupies the lowest possible depth,
+// but this is not true in general, and transformations of this IR may change the cones, although each transformation has to keep each
+// cone K-feasible. In this IR, LUT fan-in and fan-out are represented explicitly by edge lists; if a K-feasible cone chosen for node A
+// includes nodes B and C, there are edges between all predecessors of A, B and C in the gate IR and node A in this IR. Moreover, in
+// this IR, cones may be *realized* or *derealized*. Only realized cones will end up mapped to actual LUTs in the output of this pass.
+//
+// Intuitively, this IR contains (some, ideally but not necessarily optimal) LUT representation for each input cell. By starting at outputs
+// and traversing the graph of this IR backwards, each K-feasible cone is converted to an actual LUT at the end of the pass. This is
+// the same as iterating through each realized LUT.
+//
+// The following are the invariants of this IR:
+//   a) Each gate IR node corresponds to a K-feasible cut.
+//   b) Each realized LUT is reachable through backward edges from some output.
+//   c) The LUT fan-in is exactly the fan-in of its constituent gates minus the fan-out of its constituent gates.
+// The invariants are kept even for derealized LUTs, since the whole point of this IR is ease of packing, unpacking, and repacking LUTs.
+//
+// In terms of data, this IR is comprised of `lut_nodes` (the set of all realized LUTs), `lut_gates` (the map from a LUT to its
+// constituent gates), `lut_edges_fw` and `lut_edges_bw` fields. The `inputs` and `outputs` fields are shared with the gate IR.
+//
+// We call this IR "LUT IR".
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+#include "kernel/modtools.h"
+#include "kernel/consteval.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct GraphStyle
+{
+	string label;
+	string color, fillcolor;
+
+	GraphStyle(string label = "", string color = "black", string fillcolor = "") :
+		label(label), color(color), fillcolor(fillcolor) {}
+};
+
+static string dot_escape(string value)
+{
+	std::string escaped;
+	for (char c : value) {
+		if (c == '\n')
+		{
+			escaped += "\\n";
+			continue;
+		}
+		if (c == '\\' || c == '"')
+			escaped += "\\";
+		escaped += c;
+	}
+	return escaped;
+}
+
+static void dump_dot_graph(string filename,
+                           pool<RTLIL::SigBit> nodes, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges,
+                           pool<RTLIL::SigBit> inputs, pool<RTLIL::SigBit> outputs,
+                           std::function<GraphStyle(RTLIL::SigBit)> node_style =
+                           		[](RTLIL::SigBit) { return GraphStyle{}; },
+                           std::function<GraphStyle(RTLIL::SigBit, RTLIL::SigBit)> edge_style =
+                           		[](RTLIL::SigBit, RTLIL::SigBit) { return GraphStyle{}; },
+                           string name = "")
+{
+	FILE *f = fopen(filename.c_str(), "w");
+	fprintf(f, "digraph \"%s\" {\n", name.c_str());
+	fprintf(f, "  rankdir=\"TB\";\n");
+
+	dict<RTLIL::SigBit, int> ids;
+	for (auto node : nodes)
+	{
+		ids[node] = ids.size();
+
+		string shape = "ellipse";
+		if (inputs[node])
+			shape = "box";
+		if (outputs[node])
+			shape = "octagon";
+		auto prop = node_style(node);
+		string style = "";
+		if (!prop.fillcolor.empty())
+			style = "filled";
+		fprintf(f, "  n%d [ shape=%s, fontname=\"Monospace\", label=\"%s\", color=\"%s\", fillcolor=\"%s\", style=\"%s\" ];\n",
+		        ids[node], shape.c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str(), style.c_str());
+	}
+
+	fprintf(f, "  { rank=\"source\"; ");
+	for (auto input : inputs)
+		if (nodes[input])
+			fprintf(f, "n%d; ", ids[input]);
+	fprintf(f, "}\n");
+
+	fprintf(f, "  { rank=\"sink\"; ");
+	for (auto output : outputs)
+		if (nodes[output])
+			fprintf(f, "n%d; ", ids[output]);
+	fprintf(f, "}\n");
+
+	for (auto edge : edges)
+	{
+		auto source = edge.first;
+		for (auto sink : edge.second) {
+			if (nodes[source] && nodes[sink])
+			{
+				auto prop = edge_style(source, sink);
+				fprintf(f, "  n%d -> n%d [ label=\"%s\", color=\"%s\", fillcolor=\"%s\" ];\n",
+				        ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str());
+			}
+		}
+	}
+
+	fprintf(f, "}\n");
+	fclose(f);
+}
+
+struct FlowGraph
+{
+	const RTLIL::SigBit source;
+	RTLIL::SigBit sink;
+	pool<RTLIL::SigBit> nodes = {source};
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+
+	const int MAX_NODE_FLOW = 1;
+	dict<RTLIL::SigBit, int> node_flow;
+	dict<pair<RTLIL::SigBit, RTLIL::SigBit>, int> edge_flow;
+
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed;
+
+	void dump_dot_graph(string filename)
+	{
+		auto node_style = [&](RTLIL::SigBit node) {
+			string label = (node == source) ? "(source)" : log_signal(node);
+			for (auto collapsed_node : collapsed[node])
+				label += stringf(" %s", log_signal(collapsed_node));
+			int flow = node_flow[node];
+			if (node != source && node != sink)
+				label += stringf("\n%d/%d", flow, MAX_NODE_FLOW);
+			else
+				label += stringf("\n%d/∞", flow);
+			return GraphStyle{label, flow < MAX_NODE_FLOW ? "green" : "black"};
+		};
+		auto edge_style = [&](RTLIL::SigBit source, RTLIL::SigBit sink) {
+			int flow = edge_flow[{source, sink}];
+			return GraphStyle{stringf("%d/∞", flow), flow > 0 ? "blue" : "black"};
+		};
+		::dump_dot_graph(filename, nodes, edges_fw, {source}, {sink}, node_style, edge_style);
+	}
+
+	// Here, we are working on the Nt'' network, but our representation is the Nt' network.
+	// The difference between these is that where in Nt' we have a subgraph:
+	//
+	//   v1 -> v2 -> v3
+	//
+	// in Nt'' we have a corresponding subgraph:
+	//
+	//   v'1b -∞-> v'2t -f-> v'2b -∞-> v'3t
+	//
+	// To address this, we split each node v into two nodes, v't and v'b. This representation is virtual,
+	// in the sense that nodes v't and v'b are overlaid on top of the original node v, and only exist
+	// in paths and worklists.
+
+	struct NodePrime
+	{
+		RTLIL::SigBit node;
+		bool is_bottom;
+
+		NodePrime(RTLIL::SigBit node, bool is_bottom) :
+			node(node), is_bottom(is_bottom) {}
+
+		bool operator==(const NodePrime &other) const
+		{
+			return node == other.node && is_bottom == other.is_bottom;
+		}
+		bool operator!=(const NodePrime &other) const
+		{
+			return !(*this == other);
+		}
+		unsigned int hash() const
+		{
+			return hash_ops<pair<RTLIL::SigBit, int>>::hash({node, is_bottom});
+		}
+
+		static NodePrime top(RTLIL::SigBit node)
+		{
+			return NodePrime(node, /*is_bottom=*/false);
+		}
+
+		static NodePrime bottom(RTLIL::SigBit node)
+		{
+			return NodePrime(node, /*is_bottom=*/true);
+		}
+
+		NodePrime as_top() const
+		{
+			log_assert(is_bottom);
+			return top(node);
+		}
+
+		NodePrime as_bottom() const
+		{
+			log_assert(!is_bottom);
+			return bottom(node);
+		}
+	};
+
+	bool find_augmenting_path(bool commit)
+	{
+		NodePrime source_prime = {source, true};
+		NodePrime sink_prime = {sink, false};
+		vector<NodePrime> path = {source_prime};
+		pool<NodePrime> visited = {};
+		bool found;
+		do {
+			found = false;
+
+			auto node_prime = path.back();
+			visited.insert(node_prime);
+
+			if (!node_prime.is_bottom) // vt
+			{
+				if (!visited[node_prime.as_bottom()] && node_flow[node_prime.node] < MAX_NODE_FLOW)
+				{
+					path.push_back(node_prime.as_bottom());
+					found = true;
+				}
+				else
+				{
+					for (auto node_pred : edges_bw[node_prime.node])
+					{
+						if (!visited[NodePrime::bottom(node_pred)] && edge_flow[{node_pred, node_prime.node}] > 0)
+						{
+							path.push_back(NodePrime::bottom(node_pred));
+							found = true;
+							break;
+						}
+					}
+				}
+			}
+			else // vb
+			{
+				if (!visited[node_prime.as_top()] && node_flow[node_prime.node] > 0)
+				{
+					path.push_back(node_prime.as_top());
+					found = true;
+				}
+				else
+				{
+					for (auto node_succ : edges_fw[node_prime.node])
+					{
+						if (!visited[NodePrime::top(node_succ)] /* && edge_flow[...] < ∞ */)
+						{
+							path.push_back(NodePrime::top(node_succ));
+							found = true;
+							break;
+						}
+					}
+				}
+			}
+
+			if (!found && path.size() > 1)
+			{
+				path.pop_back();
+				found = true;
+			}
+		} while(path.back() != sink_prime && found);
+
+		if (commit && path.back() == sink_prime)
+		{
+			auto prev_prime = path.front();
+			for (auto node_prime : path)
+			{
+				if (node_prime == source_prime)
+					continue;
+
+				log_assert(prev_prime.is_bottom ^ node_prime.is_bottom);
+				if (prev_prime.node == node_prime.node)
+				{
+					auto node = node_prime.node;
+					if (!prev_prime.is_bottom && node_prime.is_bottom)
+					{
+						log_assert(node_flow[node] == 0);
+						node_flow[node]++;
+					}
+					else
+					{
+						log_assert(node_flow[node] != 0);
+						node_flow[node]--;
+					}
+				}
+				else
+				{
+					if (prev_prime.is_bottom && !node_prime.is_bottom)
+					{
+						log_assert(true /* edge_flow[...] < ∞ */);
+						edge_flow[{prev_prime.node, node_prime.node}]++;
+					}
+					else
+					{
+						log_assert((edge_flow[{node_prime.node, prev_prime.node}] > 0));
+						edge_flow[{node_prime.node, prev_prime.node}]--;
+					}
+				}
+				prev_prime = node_prime;
+			}
+
+			node_flow[source]++;
+			node_flow[sink]++;
+		}
+		return path.back() == sink_prime;
+	}
+
+	int maximum_flow(int order)
+	{
+		int flow = 0;
+		while (flow < order && find_augmenting_path(/*commit=*/true))
+			flow++;
+		return flow + find_augmenting_path(/*commit=*/false);
+	}
+
+	pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> edge_cut()
+	{
+		pool<RTLIL::SigBit> x, xi;
+
+		NodePrime source_prime = {source, true};
+		NodePrime sink_prime = {sink, false};
+		pool<NodePrime> visited;
+		vector<NodePrime> worklist = {source_prime};
+		while (!worklist.empty())
+		{
+			auto node_prime = worklist.back();
+			worklist.pop_back();
+			if (visited[node_prime])
+				continue;
+			visited.insert(node_prime);
+
+			if (!node_prime.is_bottom)
+				x.insert(node_prime.node);
+
+			// Mincut is constructed by traversing a graph in an undirected way along forward edges that aren't full, or backward edges
+			// that aren't empty.
+			if (!node_prime.is_bottom) // top
+			{
+				if (node_flow[node_prime.node] < MAX_NODE_FLOW)
+					worklist.push_back(node_prime.as_bottom());
+				for (auto node_pred : edges_bw[node_prime.node])
+					if (edge_flow[{node_pred, node_prime.node}] > 0)
+						worklist.push_back(NodePrime::bottom(node_pred));
+			}
+			else // bottom
+			{
+				if (node_flow[node_prime.node] > 0)
+					worklist.push_back(node_prime.as_top());
+				for (auto node_succ : edges_fw[node_prime.node])
+					if (true /* edge_flow[...] < ∞ */)
+						worklist.push_back(NodePrime::top(node_succ));
+			}
+		}
+
+		for (auto node : nodes)
+			if (!x[node])
+				xi.insert(node);
+
+		for (auto collapsed_node : collapsed[sink])
+			xi.insert(collapsed_node);
+
+		log_assert(!x[sink] && xi[sink]);
+		return {x, xi};
+	}
+};
+
+struct FlowmapWorker
+{
+	int order;
+	int r_alpha, r_beta, r_gamma;
+	bool debug, debug_relax;
+
+	RTLIL::Module *module;
+	SigMap sigmap;
+	ModIndex index;
+
+	dict<RTLIL::SigBit, ModIndex::PortInfo> node_origins;
+
+	// Gate IR
+	pool<RTLIL::SigBit> nodes, inputs, outputs;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+	dict<RTLIL::SigBit, int> labels;
+
+	// LUT IR
+	pool<RTLIL::SigBit> lut_nodes;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_gates;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_edges_fw, lut_edges_bw;
+	dict<RTLIL::SigBit, int> lut_depths, lut_altitudes, lut_slacks;
+
+	int gate_count = 0, lut_count = 0, packed_count = 0;
+	int gate_area = 0, lut_area = 0;
+
+	enum class GraphMode {
+		Label,
+		Cut,
+		Slack,
+	};
+
+	void dump_dot_graph(string filename, GraphMode mode,
+	                    pool<RTLIL::SigBit> subgraph_nodes = {}, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> subgraph_edges = {},
+	                    dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed = {},
+	                    pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut = {})
+	{
+		if (subgraph_nodes.empty())
+			subgraph_nodes = nodes;
+		if (subgraph_edges.empty())
+			subgraph_edges = edges_fw;
+
+		auto node_style = [&](RTLIL::SigBit node) {
+			string label = log_signal(node);
+			for (auto collapsed_node : collapsed[node])
+				if (collapsed_node != node)
+					label += stringf(" %s", log_signal(collapsed_node));
+			switch (mode)
+			{
+				case GraphMode::Label:
+					if (labels[node] == -1)
+					{
+						label += "\nl=?";
+						return GraphStyle{label};
+					}
+					else
+					{
+						label += stringf("\nl=%d", labels[node]);
+						string fillcolor = stringf("/set311/%d", 1 + labels[node] % 11);
+						return GraphStyle{label, "", fillcolor};
+					}
+
+				case GraphMode::Cut:
+					if (cut.first[node])
+						return GraphStyle{label, "blue"};
+					if (cut.second[node])
+						return GraphStyle{label, "red"};
+					return GraphStyle{label};
+
+				case GraphMode::Slack:
+					label += stringf("\nd=%d a=%d\ns=%d", lut_depths[node], lut_altitudes[node], lut_slacks[node]);
+					return GraphStyle{label, lut_slacks[node] == 0 ? "red" : "black"};
+			}
+			return GraphStyle{label};
+		};
+		auto edge_style = [&](RTLIL::SigBit, RTLIL::SigBit) {
+			return GraphStyle{};
+		};
+		::dump_dot_graph(filename, subgraph_nodes, subgraph_edges, inputs, outputs, node_style, edge_style, module->name.str());
+	}
+
+	void dump_dot_lut_graph(string filename, GraphMode mode)
+	{
+		pool<RTLIL::SigBit> lut_and_input_nodes;
+		lut_and_input_nodes.insert(lut_nodes.begin(), lut_nodes.end());
+		lut_and_input_nodes.insert(inputs.begin(), inputs.end());
+		dump_dot_graph(filename, mode, lut_and_input_nodes, lut_edges_fw, lut_gates);
+	}
+
+	pool<RTLIL::SigBit> find_subgraph(RTLIL::SigBit sink)
+	{
+		pool<RTLIL::SigBit> subgraph;
+		pool<RTLIL::SigBit> worklist = {sink};
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			subgraph.insert(node);
+			for (auto source : edges_bw[node])
+			{
+				if (!subgraph[source])
+					worklist.insert(source);
+			}
+		}
+		return subgraph;
+	}
+
+	FlowGraph build_flow_graph(RTLIL::SigBit sink, int p)
+	{
+		FlowGraph flow_graph;
+		flow_graph.sink = sink;
+
+		pool<RTLIL::SigBit> worklist = {sink}, visited;
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			visited.insert(node);
+
+			auto collapsed_node = labels[node] == p ? sink : node;
+			if (node != collapsed_node)
+				flow_graph.collapsed[collapsed_node].insert(node);
+			flow_graph.nodes.insert(collapsed_node);
+
+			for (auto node_pred : edges_bw[node])
+			{
+				auto collapsed_node_pred = labels[node_pred] == p ? sink : node_pred;
+				if (node_pred != collapsed_node_pred)
+					flow_graph.collapsed[collapsed_node_pred].insert(node_pred);
+				if (collapsed_node != collapsed_node_pred)
+				{
+					flow_graph.edges_bw[collapsed_node].insert(collapsed_node_pred);
+					flow_graph.edges_fw[collapsed_node_pred].insert(collapsed_node);
+				}
+				if (inputs[node_pred])
+				{
+					flow_graph.edges_bw[collapsed_node_pred].insert(flow_graph.source);
+					flow_graph.edges_fw[flow_graph.source].insert(collapsed_node_pred);
+				}
+
+				if (!visited[node_pred])
+					worklist.insert(node_pred);
+			}
+		}
+		return flow_graph;
+	}
+
+	void discover_nodes(pool<IdString> cell_types)
+	{
+		for (auto cell : module->selected_cells())
+		{
+			if (!cell_types[cell->type])
+				continue;
+
+			if (!cell->known())
+				log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell));
+
+			pool<RTLIL::SigBit> fanout;
+			for (auto conn : cell->connections())
+			{
+				if (!cell->output(conn.first)) continue;
+				int offset = -1;
+				for (auto bit : conn.second)
+				{
+					offset++;
+					if (!bit.wire) continue;
+					auto mapped_bit = sigmap(bit);
+					if (nodes[mapped_bit])
+						log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit));
+					nodes.insert(mapped_bit);
+					node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset);
+					fanout.insert(mapped_bit);
+				}
+			}
+
+			int fanin = 0;
+			for (auto conn : cell->connections())
+			{
+				if (!cell->input(conn.first)) continue;
+				for (auto bit : sigmap(conn.second))
+				{
+					if (!bit.wire) continue;
+					for (auto fanout_bit : fanout)
+					{
+						edges_fw[bit].insert(fanout_bit);
+						edges_bw[fanout_bit].insert(bit);
+					}
+					fanin++;
+				}
+			}
+
+			if (fanin > order)
+				log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n",
+				          cell->type.c_str(), log_id(module), log_id(cell), fanin, order);
+
+			gate_count++;
+			gate_area += 1 << fanin;
+		}
+
+		for (auto edge : edges_fw)
+		{
+			if (!nodes[edge.first])
+			{
+				inputs.insert(edge.first);
+				nodes.insert(edge.first);
+			}
+		}
+
+		for (auto node : nodes)
+		{
+			auto node_info = index.query(node);
+			if (node_info->is_output && !inputs[node])
+				outputs.insert(node);
+			for (auto port : node_info->ports)
+				if (!cell_types[port.cell->type] && !inputs[node])
+					outputs.insert(node);
+		}
+
+		if (debug)
+		{
+			dump_dot_graph("flowmap-initial.dot", GraphMode::Label);
+			log("Dumped initial graph to `flowmap-initial.dot`.\n");
+		}
+	}
+
+	void label_nodes()
+	{
+		for (auto node : nodes)
+			labels[node] = -1;
+		for (auto input : inputs)
+		{
+			if (input.wire->attributes.count("\\$flowmap_level"))
+				labels[input] = input.wire->attributes["\\$flowmap_level"].as_int();
+			else
+				labels[input] = 0;
+		}
+
+		pool<RTLIL::SigBit> worklist = nodes;
+		int debug_num = 0;
+		while (!worklist.empty())
+		{
+			auto sink = worklist.pop();
+			if (labels[sink] != -1)
+				continue;
+
+			bool inputs_have_labels = true;
+			for (auto sink_input : edges_bw[sink])
+			{
+				if (labels[sink_input] == -1)
+				{
+					inputs_have_labels = false;
+					break;
+				}
+			}
+			if (!inputs_have_labels)
+				continue;
+
+			if (debug)
+			{
+				debug_num++;
+				log("Examining subgraph %d rooted in %s.\n", debug_num, log_signal(sink));
+			}
+
+			pool<RTLIL::SigBit> subgraph = find_subgraph(sink);
+
+			int p = 1;
+			for (auto subgraph_node : subgraph)
+				p = max(p, labels[subgraph_node]);
+
+			FlowGraph flow_graph = build_flow_graph(sink, p);
+			int flow = flow_graph.maximum_flow(order);
+			pool<RTLIL::SigBit> x, xi;
+			if (flow <= order)
+			{
+				labels[sink] = p;
+				auto cut = flow_graph.edge_cut();
+				x = cut.first;
+				xi = cut.second;
+			}
+			else
+			{
+				labels[sink] = p + 1;
+				x = subgraph;
+				x.erase(sink);
+				xi.insert(sink);
+			}
+			lut_gates[sink] = xi;
+
+			pool<RTLIL::SigBit> k;
+			for (auto xi_node : xi)
+			{
+				for (auto xi_node_pred : edges_bw[xi_node])
+					if (x[xi_node_pred])
+						k.insert(xi_node_pred);
+			}
+			log_assert((int)k.size() <= order);
+			lut_edges_bw[sink] = k;
+			for (auto k_node : k)
+				lut_edges_fw[k_node].insert(sink);
+
+			if (debug)
+			{
+				log("  Maximum flow: %d. Assigned label %d.\n", flow, labels[sink]);
+				dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), GraphMode::Cut, subgraph, {}, {}, {x, xi});
+				log("  Dumped subgraph to `flowmap-%d-sub.dot`.\n", debug_num);
+				flow_graph.dump_dot_graph(stringf("flowmap-%d-flow.dot", debug_num));
+				log("  Dumped flow graph to `flowmap-%d-flow.dot`.\n", debug_num);
+				log("    LUT inputs:");
+				for (auto k_node : k)
+					log(" %s", log_signal(k_node));
+				log(".\n");
+				log("    LUT packed gates:");
+				for (auto xi_node : xi)
+					log(" %s", log_signal(xi_node));
+				log(".\n");
+			}
+
+			for (auto sink_succ : edges_fw[sink])
+				worklist.insert(sink_succ);
+		}
+
+		if (debug)
+		{
+			dump_dot_graph("flowmap-labeled.dot", GraphMode::Label);
+			log("Dumped labeled graph to `flowmap-labeled.dot`.\n");
+		}
+	}
+
+	int map_luts()
+	{
+		pool<RTLIL::SigBit> worklist = outputs;
+		while (!worklist.empty())
+		{
+			auto lut_node = worklist.pop();
+			lut_nodes.insert(lut_node);
+			for (auto input_node : lut_edges_bw[lut_node])
+				if (!lut_nodes[input_node] && !inputs[input_node])
+					worklist.insert(input_node);
+		}
+
+		int depth = 0;
+		for (auto label : labels)
+			depth = max(depth, label.second);
+		log("Mapped to %zu LUTs with maximum depth %d.\n", lut_nodes.size(), depth);
+
+		if (debug)
+		{
+			dump_dot_lut_graph("flowmap-mapped.dot", GraphMode::Label);
+			log("Dumped mapped graph to `flowmap-mapped.dot`.\n");
+		}
+
+		return depth;
+	}
+
+	void realize_derealize_lut(RTLIL::SigBit lut, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		pool<RTLIL::SigBit> worklist = {lut};
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			if (inputs[lut])
+				continue;
+
+			bool realized_successors = false;
+			for (auto lut_succ : lut_edges_fw[lut])
+				if (lut_nodes[lut_succ])
+					realized_successors = true;
+
+			if (realized_successors && !lut_nodes[lut])
+				lut_nodes.insert(lut);
+			else if (!realized_successors && lut_nodes[lut])
+				lut_nodes.erase(lut);
+			else
+				continue;
+
+			for (auto lut_pred : lut_edges_bw[lut])
+				worklist.insert(lut_pred);
+
+			if (changed)
+				changed->insert(lut);
+		}
+	}
+
+	void add_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		log_assert(!lut_edges_fw[pred][succ] && !lut_edges_bw[succ][pred]);
+		log_assert((int)lut_edges_bw[succ].size() < order);
+
+		lut_edges_fw[pred].insert(succ);
+		lut_edges_bw[succ].insert(pred);
+		realize_derealize_lut(pred, changed);
+
+		if (changed)
+		{
+			changed->insert(pred);
+			changed->insert(succ);
+		}
+	}
+
+	void remove_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		log_assert(lut_edges_fw[pred][succ] && lut_edges_bw[succ][pred]);
+
+		lut_edges_fw[pred].erase(succ);
+		lut_edges_bw[succ].erase(pred);
+		realize_derealize_lut(pred, changed);
+
+		if (changed)
+		{
+			if (lut_nodes[pred])
+				changed->insert(pred);
+			changed->insert(succ);
+		}
+	}
+
+	pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut_lut_at_gate(RTLIL::SigBit lut, RTLIL::SigBit lut_gate)
+	{
+		pool<RTLIL::SigBit> gate_inputs = lut_edges_bw[lut];
+		pool<RTLIL::SigBit> other_inputs;
+		pool<RTLIL::SigBit> worklist = {lut};
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			for (auto node_pred : edges_bw[node])
+			{
+				if (node_pred == lut_gate)
+					continue;
+				if (lut_gates[lut][node_pred])
+					worklist.insert(node_pred);
+				else
+				{
+					gate_inputs.erase(node_pred);
+					other_inputs.insert(node_pred);
+				}
+			}
+		}
+		return {gate_inputs, other_inputs};
+	}
+
+	void compute_lut_distances(dict<RTLIL::SigBit, int> &lut_distances, bool forward,
+	                          pool<RTLIL::SigBit> initial = {}, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		pool<RTLIL::SigBit> terminals = forward ? inputs : outputs;
+		auto &lut_edges_next = forward ? lut_edges_fw : lut_edges_bw;
+		auto &lut_edges_prev = forward ? lut_edges_bw : lut_edges_fw;
+
+		if (initial.empty())
+			initial = terminals;
+		for (auto node : initial)
+			lut_distances.erase(node);
+
+		pool<RTLIL::SigBit> worklist = initial;
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			int lut_distance = 0;
+			if (forward && inputs[lut])
+				lut_distance = labels[lut]; // to support (* $flowmap_level=n *)
+			for (auto lut_prev : lut_edges_prev[lut])
+				if ((lut_nodes[lut_prev] || inputs[lut_prev]) && lut_distances.count(lut_prev))
+					lut_distance = max(lut_distance, lut_distances[lut_prev] + 1);
+			if (!lut_distances.count(lut) || lut_distances[lut] != lut_distance)
+			{
+				lut_distances[lut] = lut_distance;
+				if (changed != nullptr && !inputs[lut])
+					changed->insert(lut);
+				for (auto lut_next : lut_edges_next[lut])
+					if (lut_nodes[lut_next] || inputs[lut_next])
+						worklist.insert(lut_next);
+			}
+		}
+	}
+
+	void check_lut_distances(const dict<RTLIL::SigBit, int> &lut_distances, bool forward)
+	{
+		dict<RTLIL::SigBit, int> gold_lut_distances;
+		compute_lut_distances(gold_lut_distances, forward);
+		for (auto lut_distance : lut_distances)
+			if (lut_nodes[lut_distance.first])
+				log_assert(lut_distance.second == gold_lut_distances[lut_distance.first]);
+	}
+
+	// LUT depth is the length of the longest path from any input in LUT fan-in to LUT.
+	// LUT altitude (for lack of a better term) is the length of the longest path from LUT to any output in LUT fan-out.
+	void update_lut_depths_altitudes(pool<RTLIL::SigBit> worklist = {}, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		compute_lut_distances(lut_depths, /*forward=*/true, worklist, changed);
+		compute_lut_distances(lut_altitudes, /*forward=*/false, worklist, changed);
+		if (debug_relax && !worklist.empty()) {
+			check_lut_distances(lut_depths, /*forward=*/true);
+			check_lut_distances(lut_altitudes, /*forward=*/false);
+		}
+	}
+
+	// LUT critical output set is the set of outputs whose depth will increase (equivalently, slack will decrease) if the depth of
+	// the LUT increases. (This is referred to as RPOv for LUTv in the paper.)
+	void compute_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                  pool<RTLIL::SigBit> worklist = {})
+	{
+		if (worklist.empty())
+			worklist = lut_nodes;
+
+		while (!worklist.empty())
+		{
+			bool updated_some = false;
+			for (auto lut : worklist)
+			{
+				if (outputs[lut])
+					lut_critical_outputs[lut] = {lut};
+				else
+				{
+					bool all_succ_computed = true;
+					lut_critical_outputs[lut] = {};
+					for (auto lut_succ : lut_edges_fw[lut])
+					{
+						if (lut_nodes[lut_succ] && lut_depths[lut_succ] == lut_depths[lut] + 1)
+						{
+							if (lut_critical_outputs.count(lut_succ))
+								lut_critical_outputs[lut].insert(lut_critical_outputs[lut_succ].begin(), lut_critical_outputs[lut_succ].end());
+							else
+							{
+								all_succ_computed = false;
+								break;
+							}
+						}
+					}
+					if (!all_succ_computed)
+					{
+						lut_critical_outputs.erase(lut);
+						continue;
+					}
+				}
+				worklist.erase(lut);
+				updated_some = true;
+			}
+			log_assert(updated_some);
+		}
+	}
+
+	// Invalidating LUT critical output sets is tricky, because increasing the depth of a LUT may take other, adjacent LUTs off the critical
+	// path to the output. Conservatively, if we increase depth of some LUT, every LUT in its input cone needs to have its critical output
+	// set invalidated, too.
+	pool<RTLIL::SigBit> invalidate_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                                    pool<RTLIL::SigBit> worklist)
+	{
+		pool<RTLIL::SigBit> changed;
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			changed.insert(lut);
+			lut_critical_outputs.erase(lut);
+			for (auto lut_pred : lut_edges_bw[lut])
+			{
+				if (lut_nodes[lut_pred] && !changed[lut_pred])
+				{
+					changed.insert(lut_pred);
+					worklist.insert(lut_pred);
+				}
+			}
+		}
+		return changed;
+	}
+
+	void check_lut_critical_outputs(const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		dict<RTLIL::SigBit, pool<RTLIL::SigBit>> gold_lut_critical_outputs;
+		compute_lut_critical_outputs(gold_lut_critical_outputs);
+		for (auto lut_critical_output : lut_critical_outputs)
+			if (lut_nodes[lut_critical_output.first])
+				log_assert(lut_critical_output.second == gold_lut_critical_outputs[lut_critical_output.first]);
+	}
+
+	void update_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                 pool<RTLIL::SigBit> worklist = {})
+	{
+		if (!worklist.empty())
+		{
+			pool<RTLIL::SigBit> invalidated = invalidate_lut_critical_outputs(lut_critical_outputs, worklist);
+			compute_lut_critical_outputs(lut_critical_outputs, invalidated);
+			check_lut_critical_outputs(lut_critical_outputs);
+		}
+		else
+			compute_lut_critical_outputs(lut_critical_outputs);
+	}
+
+	void update_breaking_node_potentials(dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> &potentials,
+	                                     const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		for (auto lut : lut_nodes)
+		{
+			if (potentials.count(lut))
+				continue;
+			if (lut_gates[lut].size() == 1 || lut_slacks[lut] == 0)
+				continue;
+
+			if (debug_relax)
+				log("  Computing potentials for LUT %s.\n", log_signal(lut));
+
+			for (auto lut_gate : lut_gates[lut])
+			{
+				if (lut == lut_gate)
+					continue;
+
+				if (debug_relax)
+					log("    Considering breaking node %s.\n", log_signal(lut_gate));
+
+				int r_ex, r_im, r_slk;
+
+				auto cut_inputs = cut_lut_at_gate(lut, lut_gate);
+				pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second;
+				if (gate_inputs.empty() && (int)other_inputs.size() == order)
+				{
+					if (debug_relax)
+						log("      Breaking would result in a (k+1)-LUT.\n");
+					continue;
+				}
+
+				pool<RTLIL::SigBit> elim_fanin_luts;
+				for (auto gate_input : gate_inputs)
+				{
+					if (lut_edges_fw[gate_input].size() == 1)
+					{
+						log_assert(lut_edges_fw[gate_input][lut]);
+						elim_fanin_luts.insert(gate_input);
+					}
+				}
+				if (debug_relax)
+				{
+					if (!lut_nodes[lut_gate])
+						log("      Breaking requires a new LUT.\n");
+					if (!gate_inputs.empty())
+					{
+						log("      Breaking eliminates LUT inputs");
+						for (auto gate_input : gate_inputs)
+							log(" %s", log_signal(gate_input));
+						log(".\n");
+					}
+					if (!elim_fanin_luts.empty())
+					{
+						log("      Breaking eliminates fan-in LUTs");
+						for (auto elim_fanin_lut : elim_fanin_luts)
+							log(" %s", log_signal(elim_fanin_lut));
+						log(".\n");
+					}
+				}
+				r_ex = (lut_nodes[lut_gate] ? 0 : -1) + elim_fanin_luts.size();
+
+				pool<pair<RTLIL::SigBit, RTLIL::SigBit>> maybe_mergeable_luts;
+
+				// Try to merge LUTv with one of its successors.
+				RTLIL::SigBit last_lut_succ;
+				int fanout = 0;
+				for (auto lut_succ : lut_edges_fw[lut])
+				{
+					if (lut_nodes[lut_succ])
+					{
+						fanout++;
+						last_lut_succ = lut_succ;
+					}
+				}
+				if (fanout == 1)
+					maybe_mergeable_luts.insert({lut, last_lut_succ});
+
+				// Try to merge LUTv with one of its predecessors.
+				for (auto lut_pred : other_inputs)
+				{
+					int fanout = 0;
+					for (auto lut_pred_succ : lut_edges_fw[lut_pred])
+						if (lut_nodes[lut_pred_succ] || lut_pred_succ == lut_gate)
+							fanout++;
+					if (fanout == 1)
+						maybe_mergeable_luts.insert({lut_pred, lut});
+				}
+
+				// Try to merge LUTw with one of its predecessors.
+				for (auto lut_gate_pred : lut_edges_bw[lut_gate])
+				{
+					int fanout = 0;
+					for (auto lut_gate_pred_succ : lut_edges_fw[lut_gate_pred])
+						if (lut_nodes[lut_gate_pred_succ] || lut_gate_pred_succ == lut_gate)
+							fanout++;
+					if (fanout == 1)
+						maybe_mergeable_luts.insert({lut_gate_pred, lut_gate});
+				}
+
+				r_im = 0;
+				for (auto maybe_mergeable_pair : maybe_mergeable_luts)
+				{
+					log_assert(lut_edges_fw[maybe_mergeable_pair.first][maybe_mergeable_pair.second]);
+					pool<RTLIL::SigBit> unique_inputs;
+					for (auto fst_lut_pred : lut_edges_bw[maybe_mergeable_pair.first])
+						if (lut_nodes[fst_lut_pred])
+							unique_inputs.insert(fst_lut_pred);
+					for (auto snd_lut_pred : lut_edges_bw[maybe_mergeable_pair.second])
+						if (lut_nodes[snd_lut_pred])
+							unique_inputs.insert(snd_lut_pred);
+					unique_inputs.erase(maybe_mergeable_pair.first);
+					if ((int)unique_inputs.size() <= order)
+					{
+						if (debug_relax)
+							log("      Breaking may allow merging %s and %s.\n",
+							    log_signal(maybe_mergeable_pair.first), log_signal(maybe_mergeable_pair.second));
+						r_im++;
+					}
+				}
+
+				int lut_gate_depth;
+				if (lut_nodes[lut_gate])
+					lut_gate_depth = lut_depths[lut_gate];
+				else
+				{
+					lut_gate_depth = 0;
+					for (auto lut_gate_pred : lut_edges_bw[lut_gate])
+						lut_gate_depth = max(lut_gate_depth, lut_depths[lut_gate_pred] + 1);
+				}
+				if (lut_depths[lut] >= lut_gate_depth + 1)
+					r_slk = 0;
+				else
+				{
+					int depth_delta = lut_gate_depth + 1 - lut_depths[lut];
+					if (depth_delta > lut_slacks[lut])
+					{
+						if (debug_relax)
+							log("      Breaking would increase depth by %d, which is more than available slack.\n", depth_delta);
+						continue;
+					}
+
+					if (debug_relax)
+					{
+						log("      Breaking increases depth of LUT by %d.\n", depth_delta);
+						if (lut_critical_outputs.at(lut).size())
+						{
+							log("      Breaking decreases slack of outputs");
+							for (auto lut_critical_output : lut_critical_outputs.at(lut))
+							{
+								log(" %s", log_signal(lut_critical_output));
+								log_assert(lut_slacks[lut_critical_output] > 0);
+							}
+							log(".\n");
+						}
+					}
+					r_slk = lut_critical_outputs.at(lut).size() * depth_delta;
+				}
+
+				int p = 100 * (r_alpha * r_ex + r_beta * r_im + r_gamma) / (r_slk + 1);
+				if (debug_relax)
+					log("    Potential for breaking node %s: %d (Rex=%d, Rim=%d, Rslk=%d).\n",
+					    log_signal(lut_gate), p, r_ex, r_im, r_slk);
+				potentials[lut][lut_gate] = p;
+			}
+		}
+	}
+
+	bool relax_depth_for_bound(bool first, int depth_bound, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		size_t initial_count = lut_nodes.size();
+
+		for (auto node : lut_nodes)
+		{
+			lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]);
+			log_assert(lut_slacks[node] >= 0);
+		}
+		if (debug)
+		{
+			dump_dot_lut_graph(stringf("flowmap-relax-%d-initial.dot", depth_bound), GraphMode::Slack);
+			log("  Dumped initial slack graph to `flowmap-relax-%d-initial.dot`.\n", depth_bound);
+		}
+
+		dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> potentials;
+		for (int break_num = 1; ; break_num++)
+		{
+			update_breaking_node_potentials(potentials, lut_critical_outputs);
+
+			if (potentials.empty())
+			{
+				log("  Relaxed to %zu (+%zu) LUTs.\n", lut_nodes.size(), lut_nodes.size() - initial_count);
+				if (!first && break_num == 1)
+				{
+					log("  Design fully relaxed.\n");
+					return true;
+				}
+				else
+				{
+					log("  Slack exhausted.\n");
+					break;
+				}
+			}
+
+			RTLIL::SigBit breaking_lut, breaking_gate;
+			int best_potential = INT_MIN;
+			for (auto lut_gate_potentials : potentials)
+			{
+				for (auto gate_potential : lut_gate_potentials.second)
+				{
+					if (gate_potential.second > best_potential)
+					{
+						breaking_lut = lut_gate_potentials.first;
+						breaking_gate = gate_potential.first;
+						best_potential = gate_potential.second;
+					}
+				}
+			}
+			log("  Breaking LUT %s to %s LUT %s (potential %d).\n",
+			    log_signal(breaking_lut), lut_nodes[breaking_gate] ? "reuse" : "extract", log_signal(breaking_gate), best_potential);
+
+			if (debug_relax)
+				log("    Removing breaking gate %s from LUT.\n", log_signal(breaking_gate));
+			lut_gates[breaking_lut].erase(breaking_gate);
+
+			auto cut_inputs = cut_lut_at_gate(breaking_lut, breaking_gate);
+			pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second;
+
+			pool<RTLIL::SigBit> worklist = lut_gates[breaking_lut];
+			pool<RTLIL::SigBit> elim_gates = gate_inputs;
+			while (!worklist.empty())
+			{
+				auto lut_gate = worklist.pop();
+				bool all_gate_preds_elim = true;
+				for (auto lut_gate_pred : edges_bw[lut_gate])
+					if (!elim_gates[lut_gate_pred])
+						all_gate_preds_elim = false;
+				if (all_gate_preds_elim)
+				{
+					if (debug_relax)
+						log("    Removing gate %s from LUT.\n", log_signal(lut_gate));
+					lut_gates[breaking_lut].erase(lut_gate);
+					for (auto lut_gate_succ : edges_fw[lut_gate])
+						worklist.insert(lut_gate_succ);
+				}
+			}
+			log_assert(!lut_gates[breaking_lut].empty());
+
+			pool<RTLIL::SigBit> directly_affected_nodes = {breaking_lut};
+			for (auto gate_input : gate_inputs)
+			{
+				if (debug_relax)
+					log("    Removing LUT edge %s -> %s.\n", log_signal(gate_input), log_signal(breaking_lut));
+				remove_lut_edge(gate_input, breaking_lut, &directly_affected_nodes);
+			}
+			if (debug_relax)
+				log("    Adding LUT edge %s -> %s.\n", log_signal(breaking_gate), log_signal(breaking_lut));
+			add_lut_edge(breaking_gate, breaking_lut, &directly_affected_nodes);
+
+			if (debug_relax)
+				log("  Updating slack and potentials.\n");
+
+			pool<RTLIL::SigBit> indirectly_affected_nodes = {};
+			update_lut_depths_altitudes(directly_affected_nodes, &indirectly_affected_nodes);
+			update_lut_critical_outputs(lut_critical_outputs, indirectly_affected_nodes);
+			for (auto node : indirectly_affected_nodes)
+			{
+				lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]);
+				log_assert(lut_slacks[node] >= 0);
+				if (debug_relax)
+					log("    LUT %s now has depth %d and slack %d.\n", log_signal(node), lut_depths[node], lut_slacks[node]);
+			}
+
+			worklist = indirectly_affected_nodes;
+			pool<RTLIL::SigBit> visited;
+			while (!worklist.empty())
+			{
+				auto node = worklist.pop();
+				visited.insert(node);
+				potentials.erase(node);
+				// We are invalidating the entire output cone of the gate IR node, not just of the LUT IR node. This is done to also invalidate
+				// all LUTs that could contain one of the indirectly affected nodes as a *part* of them, as they may not be in the output cone
+				// of any of the LUT IR nodes, e.g. if we have a LUT IR node A and node B as predecessors of node C, where node B includes all
+				// gates from node A.
+				for (auto node_succ : edges_fw[node])
+					if (!visited[node_succ])
+						worklist.insert(node_succ);
+			}
+
+			if (debug)
+			{
+				dump_dot_lut_graph(stringf("flowmap-relax-%d-break-%d.dot", depth_bound, break_num), GraphMode::Slack);
+				log("  Dumped slack graph after break %d to `flowmap-relax-%d-break-%d.dot`.\n",  break_num, depth_bound, break_num);
+			}
+		}
+
+		return false;
+	}
+
+	void optimize_area(int depth, int optarea)
+	{
+		dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_critical_outputs;
+		update_lut_depths_altitudes();
+		update_lut_critical_outputs(lut_critical_outputs);
+
+		for (int depth_bound = depth; depth_bound <= depth + optarea; depth_bound++)
+		{
+			log("Relaxing with depth bound %d.\n", depth_bound);
+			bool fully_relaxed = relax_depth_for_bound(depth_bound == depth, depth_bound, lut_critical_outputs);
+
+			if (fully_relaxed)
+				break;
+		}
+	}
+
+	void pack_cells(int minlut)
+	{
+		ConstEval ce(module);
+		for (auto input_node : inputs)
+			ce.stop(input_node);
+
+		pool<RTLIL::SigBit> mapped_nodes;
+		for (auto node : lut_nodes)
+		{
+			if (node_origins.count(node))
+			{
+				auto origin = node_origins[node];
+				if (origin.cell->getPort(origin.port).size() == 1)
+					log("Packing %s.%s.%s (%s).\n",
+					    log_id(module), log_id(origin.cell), origin.port.c_str(), log_signal(node));
+				else
+					log("Packing %s.%s.%s [%d] (%s).\n",
+					    log_id(module), log_id(origin.cell), origin.port.c_str(), origin.offset, log_signal(node));
+			}
+			else
+			{
+				log("Packing %s.%s.\n", log_id(module), log_signal(node));
+			}
+
+			for (auto gate_node : lut_gates[node])
+			{
+				log_assert(node_origins.count(gate_node));
+
+				if (gate_node == node)
+					continue;
+
+				auto gate_origin = node_origins[gate_node];
+				if (gate_origin.cell->getPort(gate_origin.port).size() == 1)
+					log("  Packing %s.%s.%s (%s).\n",
+					    log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), log_signal(gate_node));
+				else
+					log("  Packing %s.%s.%s [%d] (%s).\n",
+					    log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), gate_origin.offset, log_signal(gate_node));
+			}
+
+			vector<RTLIL::SigBit> input_nodes(lut_edges_bw[node].begin(), lut_edges_bw[node].end());
+			RTLIL::Const lut_table(State::Sx, max(1 << input_nodes.size(), 1 << minlut));
+			for (unsigned i = 0; i < (1 << input_nodes.size()); i++)
+			{
+				ce.push();
+				for (size_t n = 0; n < input_nodes.size(); n++)
+					ce.set(input_nodes[n], ((i >> n) & 1) ? State::S1 : State::S0);
+
+				RTLIL::SigSpec value = node, undef;
+				if (!ce.eval(value, undef))
+				{
+					string env;
+					for (auto input_node : input_nodes)
+						env += stringf("  %s = %s\n", log_signal(input_node), log_signal(ce.values_map(input_node)));
+					log_error("Cannot evaluate %s because %s is not defined.\nEvaluation environment:\n%s",
+					          log_signal(node), log_signal(undef), env.c_str());
+				}
+
+				lut_table[i] = value.as_bool() ? State::S1 : State::S0;
+				ce.pop();
+			}
+
+			RTLIL::SigSpec lut_a, lut_y = node;
+			for (auto input_node : input_nodes)
+				lut_a.append_bit(input_node);
+			lut_a.append(RTLIL::Const(State::Sx, minlut - input_nodes.size()));
+
+			RTLIL::Cell *lut = module->addLut(NEW_ID, lut_a, lut_y, lut_table);
+			mapped_nodes.insert(node);
+			for (auto gate_node : lut_gates[node])
+			{
+				auto gate_origin = node_origins[gate_node];
+				lut->add_strpool_attribute("\\src", gate_origin.cell->get_strpool_attribute("\\src"));
+				packed_count++;
+			}
+			lut_count++;
+			lut_area += lut_table.size();
+
+			if ((int)input_nodes.size() >= minlut)
+				log("  Packed into a %zu-LUT %s.%s.\n", input_nodes.size(), log_id(module), log_id(lut));
+			else
+				log("  Packed into a %zu-LUT %s.%s (implemented as %d-LUT).\n", input_nodes.size(), log_id(module), log_id(lut), minlut);
+		}
+
+		for (auto node : mapped_nodes)
+		{
+			auto origin = node_origins[node];
+			RTLIL::SigSpec driver = origin.cell->getPort(origin.port);
+			driver[origin.offset] = module->addWire(NEW_ID);
+			origin.cell->setPort(origin.port, driver);
+		}
+	}
+
+	FlowmapWorker(int order, int minlut, pool<IdString> cell_types, int r_alpha, int r_beta, int r_gamma,
+	              bool relax, int optarea, bool debug, bool debug_relax,
+	              RTLIL::Module *module) :
+		order(order), r_alpha(r_alpha), r_beta(r_beta), r_gamma(r_gamma), debug(debug), debug_relax(debug_relax),
+		module(module), sigmap(module), index(module)
+	{
+		log("Labeling cells.\n");
+		discover_nodes(cell_types);
+		label_nodes();
+		int depth = map_luts();
+
+		if (relax)
+		{
+			log("\n");
+			log("Optimizing area.\n");
+			optimize_area(depth, optarea);
+		}
+
+		log("\n");
+		log("Packing cells.\n");
+		pack_cells(minlut);
+	}
+};
+
+static void split(std::vector<std::string> &tokens, const std::string &text, char sep)
+{
+	size_t start = 0, end = 0;
+	while ((end = text.find(sep, start)) != std::string::npos) {
+		tokens.push_back(text.substr(start, end - start));
+		start = end + 1;
+	}
+	tokens.push_back(text.substr(start));
+}
+
+struct FlowmapPass : public Pass {
+	FlowmapPass() : Pass("flowmap", "pack LUTs with FlowMap") { }
+	void help() YS_OVERRIDE
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    flowmap [options] [selection]\n");
+		log("\n");
+		log("This pass uses the FlowMap technology mapping algorithm to pack logic gates\n");
+		log("into k-LUTs with optimal depth. It allows mapping any circuit elements that can\n");
+		log("be evaluated with the `eval` pass, including cells with multiple output ports\n");
+		log("and multi-bit input and output ports.\n");
+		log("\n");
+		log("    -maxlut k\n");
+		log("        perform technology mapping for a k-LUT architecture. if not specified,\n");
+		log("        defaults to 3.\n");
+		log("\n");
+		log("    -minlut n\n");
+		log("        only produce n-input or larger LUTs. if not specified, defaults to 1.\n");
+		log("\n");
+		log("    -cells <cell>[,<cell>,...]\n");
+		log("        map specified cells. if not specified, maps $_NOT_, $_AND_, $_OR_,\n");
+		log("        $_XOR_ and $_MUX_, which are the outputs of the `simplemap` pass.\n");
+		log("\n");
+		log("    -relax\n");
+		log("        perform depth relaxation and area minimization.\n");
+		log("\n");
+		log("    -r-alpha n, -r-beta n, -r-gamma n\n");
+		log("        parameters of depth relaxation heuristic potential function.\n");
+		log("        if not specified, alpha=8, beta=2, gamma=1.\n");
+		log("\n");
+		log("    -optarea n\n");
+		log("        optimize for area by trading off at most n logic levels for fewer LUTs.\n");
+		log("        n may be zero, to optimize for area without increasing depth.\n");
+		log("        implies -relax.\n");
+		log("\n");
+		log("    -debug\n");
+		log("        dump intermediate graphs.\n");
+		log("\n");
+		log("    -debug-relax\n");
+		log("        explain decisions performed during depth relaxation.\n");
+		log("\n");
+	}
+	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+	{
+		int order = 3;
+		int minlut = 1;
+		vector<string> cells;
+		bool relax = false;
+		int r_alpha = 8, r_beta = 2, r_gamma = 1;
+		int optarea = 0;
+		bool debug = false, debug_relax = false;
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++)
+		{
+			if (args[argidx] == "-maxlut" && argidx + 1 < args.size())
+			{
+				order = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-minlut" && argidx + 1 < args.size())
+			{
+				minlut = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-cells" && argidx + 1 < args.size())
+			{
+				split(cells, args[++argidx], ',');
+				continue;
+			}
+			if (args[argidx] == "-relax")
+			{
+				relax = true;
+				continue;
+			}
+			if (args[argidx] == "-r-alpha" && argidx + 1 < args.size())
+			{
+				r_alpha = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-r-beta" && argidx + 1 < args.size())
+			{
+				r_beta = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-r-gamma" && argidx + 1 < args.size())
+			{
+				r_gamma = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-optarea" && argidx + 1 < args.size())
+			{
+				relax = true;
+				optarea = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-debug")
+			{
+				debug = true;
+				continue;
+			}
+			if (args[argidx] == "-debug-relax")
+			{
+				debug = debug_relax = true;
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		pool<IdString> cell_types;
+		if (!cells.empty())
+		{
+			for (auto &cell : cells)
+				cell_types.insert(cell);
+		}
+		else
+		{
+			cell_types = {"$_NOT_", "$_AND_", "$_OR_", "$_XOR_", "$_MUX_"};
+		}
+
+		const char *algo_r = relax ? "-r" : "";
+		log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap%s).\n", algo_r);
+
+		int gate_count = 0, lut_count = 0, packed_count = 0;
+		int gate_area = 0, lut_area = 0;
+		for (auto module : design->selected_modules())
+		{
+			FlowmapWorker worker(order, minlut, cell_types, r_alpha, r_beta, r_gamma, relax, optarea, debug, debug_relax, module);
+			gate_count += worker.gate_count;
+			lut_count += worker.lut_count;
+			packed_count += worker.packed_count;
+			gate_area += worker.gate_area;
+			lut_area += worker.lut_area;
+		}
+
+		log("\n");
+		log("Packed %d cells (%d of them duplicated) into %d LUTs.\n", packed_count, packed_count - gate_count, lut_count);
+		log("Solution takes %.1f%% of original gate area.\n", lut_area * 100.0 / gate_area);
+	}
+} FlowmapPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/tests/flowmap/flow.v b/passes/tests/flowmap/flow.v
new file mode 100644
index 000000000..297ef910e
--- /dev/null
+++ b/passes/tests/flowmap/flow.v
@@ -0,0 +1,22 @@
+// Exact reproduction of Figure 2(a) from 10.1109/43.273754.
+module top(...);
+	input a,b,c,d,e,f;
+	wire nA = b&c;
+	wire A = !nA;
+	wire nB = c|d;
+	wire B = !nB;
+	wire nC = e&f;
+	wire C = !nC;
+	wire D = A|B;
+	wire E = a&D;
+	wire nF = D&C;
+	wire F = !nF;
+	wire nG = F|B;
+	wire G = !nG;
+	wire H = a&F;
+	wire I = E|G;
+	wire J = G&C;
+	wire np = H&I;
+	output p = !np;
+	output q = A|J;
+endmodule
diff --git a/passes/tests/flowmap/flowp.v b/passes/tests/flowmap/flowp.v
new file mode 100644
index 000000000..2fb40ffa4
--- /dev/null
+++ b/passes/tests/flowmap/flowp.v
@@ -0,0 +1,16 @@
+// Like flow.v, but results in a network identical to Figure 2(b).
+module top(...);
+	input a,b,c,d,e,f;
+	wire A = b&c;
+	wire B = c|d;
+	wire C = e&f;
+	wire D = A|B;
+	wire E = a&D;
+	wire F = D&C;
+	wire G = F|B;
+	wire H = a&F;
+	wire I = E|G;
+	wire J = G&C;
+	output p = H&I;
+	output q = A|J;
+endmodule
diff --git a/passes/tests/flowmap/pack1.v b/passes/tests/flowmap/pack1.v
new file mode 100644
index 000000000..9454edf3c
--- /dev/null
+++ b/passes/tests/flowmap/pack1.v
@@ -0,0 +1,11 @@
+// Exact reproduction of Figure 3(a) from 10.1109/92.285741.
+module top(...);
+	input a,b,c,d,e,f,g,h;
+	wire x = !(c|d);
+	wire y = !(e&f);
+	wire u = !(a&b);
+	wire v = !(x|y);
+	wire w = !(g&h);
+	output s = !(u|v);
+	output t = !(v|w);
+endmodule
diff --git a/passes/tests/flowmap/pack1p.v b/passes/tests/flowmap/pack1p.v
new file mode 100644
index 000000000..fdb278833
--- /dev/null
+++ b/passes/tests/flowmap/pack1p.v
@@ -0,0 +1,11 @@
+// Like pack1.v, but results in a simpler network.
+module top(...);
+	input a,b,c,d,e,f,g,h;
+	wire x = c|d;
+	wire y = e&f;
+	wire u = a&b;
+	wire v = x|y;
+	wire w = g&h;
+	output s = u|v;
+	output t = v|w;
+endmodule
diff --git a/passes/tests/flowmap/pack2.v b/passes/tests/flowmap/pack2.v
new file mode 100644
index 000000000..445e4afb0
--- /dev/null
+++ b/passes/tests/flowmap/pack2.v
@@ -0,0 +1,15 @@
+// Exact reproduction of Figure 4(a) from 10.1109/92.285741.
+module top(...);
+	(* $flowmap_level=1 *) input a;
+	(* $flowmap_level=1 *) input b;
+	(* $flowmap_level=2 *) input c;
+	(* $flowmap_level=1 *) input d;
+	(* $flowmap_level=3 *) input e;
+	(* $flowmap_level=1 *) input f;
+	wire u = !(a&b);
+	wire w = !(c|d);
+	wire v = !(u|w);
+	wire n0 = !(w&e);
+	wire n1 = !(n0|f);
+	output n2 = !(v&n1);
+endmodule
diff --git a/passes/tests/flowmap/pack2p.v b/passes/tests/flowmap/pack2p.v
new file mode 100644
index 000000000..d4b41733d
--- /dev/null
+++ b/passes/tests/flowmap/pack2p.v
@@ -0,0 +1,15 @@
+// Like pack2.v, but results in a simpler network.
+module top(...);
+	(* $flowmap_level=1 *) input a;
+	(* $flowmap_level=1 *) input b;
+	(* $flowmap_level=2 *) input c;
+	(* $flowmap_level=1 *) input d;
+	(* $flowmap_level=3 *) input e;
+	(* $flowmap_level=1 *) input f;
+	wire u = a&b;
+	wire w = c|d;
+	wire v = u|w;
+	wire n0 = w&e;
+	wire n1 = n0|f;
+	output n2 = v&n1;
+endmodule
diff --git a/passes/tests/flowmap/pack3.v b/passes/tests/flowmap/pack3.v
new file mode 100644
index 000000000..06147a1aa
--- /dev/null
+++ b/passes/tests/flowmap/pack3.v
@@ -0,0 +1,15 @@
+// Exact reproduction of Figure 5(a) (bottom) from 10.1109/92.285741.
+module top(...);
+	input a,b,c,d,e,f,g,h,i,j;
+	wire x = !(a&b);
+	wire y = !(c|d);
+	wire z = !(e|f);
+	wire n0 = !(g&h);
+	wire n1 = !(i|j);
+	wire w = !(x&y);
+	wire n2 = !(z&n0);
+	wire n3 = !(n0|n1);
+	wire n4 = !(n2|n3);
+	wire v = !(w|n5);
+	output u = !(w&v);
+endmodule
diff --git a/passes/tests/flowmap/pack3p.v b/passes/tests/flowmap/pack3p.v
new file mode 100644
index 000000000..bc6ac1757
--- /dev/null
+++ b/passes/tests/flowmap/pack3p.v
@@ -0,0 +1,15 @@
+// Like pack2.v, but results in a simpler network.
+module top(...);
+	input a,b,c,d,e,f,g,h,i,j;
+	wire x = a&b;
+	wire y = c|d;
+	wire z = e|f;
+	wire n0 = g&h;
+	wire n1 = i|j;
+	wire w = x&y;
+	wire n2 = z&n0;
+	wire n3 = n0|n1;
+	wire n4 = n2|n3;
+	wire v = w|n5;
+	output u = w&v;
+endmodule