aboutsummaryrefslogtreecommitdiffstats
path: root/passes/pmgen
diff options
context:
space:
mode:
Diffstat (limited to 'passes/pmgen')
-rw-r--r--passes/pmgen/.gitignore2
-rw-r--r--passes/pmgen/Makefile.inc13
-rw-r--r--passes/pmgen/README.md2
-rw-r--r--passes/pmgen/generate.h140
-rw-r--r--passes/pmgen/ice40_dsp.cc282
-rw-r--r--passes/pmgen/ice40_dsp.pmg625
-rw-r--r--passes/pmgen/ice40_wrapcarry.pmg4
-rw-r--r--passes/pmgen/peepopt.cc77
-rw-r--r--passes/pmgen/peepopt_dffmux.pmg171
-rw-r--r--passes/pmgen/pmgen.py31
-rw-r--r--passes/pmgen/test_pmgen.cc129
-rw-r--r--passes/pmgen/xilinx_dsp.cc646
-rw-r--r--passes/pmgen/xilinx_dsp.pmg725
-rw-r--r--passes/pmgen/xilinx_dsp_CREG.pmg234
-rw-r--r--passes/pmgen/xilinx_dsp_cascade.pmg427
-rw-r--r--passes/pmgen/xilinx_srl.pmg30
16 files changed, 3174 insertions, 364 deletions
diff --git a/passes/pmgen/.gitignore b/passes/pmgen/.gitignore
index 6b319b8c3..e52f3282f 100644
--- a/passes/pmgen/.gitignore
+++ b/passes/pmgen/.gitignore
@@ -1 +1 @@
-/*_pm.h \ No newline at end of file
+/*_pm.h
diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc
index 4989c582a..145d2ebf9 100644
--- a/passes/pmgen/Makefile.inc
+++ b/passes/pmgen/Makefile.inc
@@ -1,5 +1,5 @@
%_pm.h: passes/pmgen/pmgen.py %.pmg
- $(P) mkdir -p passes/pmgen && python3 $< -o $@ -p $(subst _pm.h,,$(notdir $@)) $(filter-out $<,$^)
+ $(P) mkdir -p passes/pmgen && $(PYTHON_EXECUTABLE) $< -o $@ -p $(subst _pm.h,,$(notdir $@)) $(filter-out $<,$^)
# --------------------------------------
@@ -21,15 +21,24 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h))
# --------------------------------------
+OBJS += passes/pmgen/xilinx_dsp.o
+passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h
+$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h))
+$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_CREG_pm.h))
+$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h))
+
+# --------------------------------------
+
OBJS += passes/pmgen/peepopt.o
passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h
$(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h))
PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul.pmg
PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg
+PEEPOPT_PATTERN += passes/pmgen/peepopt_dffmux.pmg
passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN)
- $(P) mkdir -p passes/pmgen && python3 $< -o $@ -p peepopt $(filter-out $<,$^)
+ $(P) mkdir -p passes/pmgen && $(PYTHON_EXECUTABLE) $< -o $@ -p peepopt $(filter-out $<,$^)
# --------------------------------------
diff --git a/passes/pmgen/README.md b/passes/pmgen/README.md
index 2f5b8d0b2..39560839f 100644
--- a/passes/pmgen/README.md
+++ b/passes/pmgen/README.md
@@ -190,7 +190,7 @@ create matches for different sections of a cell. For example:
select pmux->type == $pmux
slice idx GetSize(port(pmux, \S))
index <SigBit> port(pmux, \S)[idx] === port(eq, \Y)
- set pmux_slice idx
+ set pmux_slice idx
endmatch
The first argument to `slice` is the local variable name used to identify the
diff --git a/passes/pmgen/generate.h b/passes/pmgen/generate.h
new file mode 100644
index 000000000..354583de5
--- /dev/null
+++ b/passes/pmgen/generate.h
@@ -0,0 +1,140 @@
+/*
+ * yosys -- Yosys Open SYnthesis Suite
+ *
+ * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef PMGEN_GENERATE
+#define PMGEN_GENERATE
+
+#define GENERATE_PATTERN(pmclass, pattern) \
+ generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design)
+
+void pmtest_addports(Module *module)
+{
+ pool<SigBit> driven_bits, used_bits;
+ SigMap sigmap(module);
+ int icnt = 0, ocnt = 0;
+
+ for (auto cell : module->cells())
+ for (auto conn : cell->connections())
+ {
+ if (cell->input(conn.first))
+ for (auto bit : sigmap(conn.second))
+ used_bits.insert(bit);
+ if (cell->output(conn.first))
+ for (auto bit : sigmap(conn.second))
+ driven_bits.insert(bit);
+ }
+
+ for (auto wire : vector<Wire*>(module->wires()))
+ {
+ SigSpec ibits, obits;
+ for (auto bit : sigmap(wire)) {
+ if (!used_bits.count(bit))
+ obits.append(bit);
+ if (!driven_bits.count(bit))
+ ibits.append(bit);
+ }
+ if (!ibits.empty()) {
+ Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits));
+ w->port_input = true;
+ module->connect(ibits, w);
+ }
+ if (!obits.empty()) {
+ Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits));
+ w->port_output = true;
+ module->connect(w, obits);
+ }
+ }
+
+ module->fixup_ports();
+}
+
+template <class pm>
+void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design)
+{
+ log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass);
+
+ int modcnt = 0;
+ int maxmodcnt = 100;
+ int maxsubcnt = 4;
+ int timeout = 0;
+ vector<Module*> mods;
+
+ while (modcnt < maxmodcnt)
+ {
+ int submodcnt = 0, itercnt = 0, cellcnt = 0;
+ Module *mod = design->addModule(NEW_ID);
+
+ while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000)
+ {
+ if (timeout++ > 10000)
+ log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n");
+
+ pm matcher(mod, mod->cells());
+
+ matcher.rng(1);
+ matcher.rngseed += modcnt;
+ matcher.rng(1);
+ matcher.rngseed += submodcnt;
+ matcher.rng(1);
+ matcher.rngseed += itercnt;
+ matcher.rng(1);
+ matcher.rngseed += cellcnt;
+ matcher.rng(1);
+
+ if (GetSize(mod->cells()) != cellcnt)
+ {
+ bool found_match = false;
+ run(matcher, [&](){ found_match = true; });
+ cellcnt = GetSize(mod->cells());
+
+ if (found_match) {
+ Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d",
+ pmclass, pattern, modcnt++));
+ log("Creating module %s with %d cells.\n", log_id(m), cellcnt);
+ mod->cloneInto(m);
+ pmtest_addports(m);
+ mods.push_back(m);
+ submodcnt++;
+ timeout = 0;
+ }
+ }
+
+ matcher.generate_mode = true;
+ run(matcher, [](){});
+ }
+
+ if (submodcnt && maxsubcnt < (1 << 16))
+ maxsubcnt *= 2;
+
+ design->remove(mod);
+ }
+
+ Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern));
+ log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods));
+ for (auto mod : mods) {
+ Cell *c = m->addCell(mod->name, mod->name);
+ for (auto port : mod->ports) {
+ Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port)));
+ c->setPort(port, w);
+ }
+ }
+ pmtest_addports(m);
+}
+
+#endif
diff --git a/passes/pmgen/ice40_dsp.cc b/passes/pmgen/ice40_dsp.cc
index 16bfe537f..f60e67158 100644
--- a/passes/pmgen/ice40_dsp.cc
+++ b/passes/pmgen/ice40_dsp.cc
@@ -29,19 +29,19 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
{
auto &st = pm.st_ice40_dsp;
-#if 0
- log("\n");
- log("ffA: %s\n", log_id(st.ffA, "--"));
- log("ffB: %s\n", log_id(st.ffB, "--"));
- log("mul: %s\n", log_id(st.mul, "--"));
- log("ffY: %s\n", log_id(st.ffY, "--"));
- log("addAB: %s\n", log_id(st.addAB, "--"));
- log("muxAB: %s\n", log_id(st.muxAB, "--"));
- log("ffS: %s\n", log_id(st.ffS, "--"));
-#endif
-
log("Checking %s.%s for iCE40 DSP inference.\n", log_id(pm.module), log_id(st.mul));
+ log_debug("ffA: %s %s %s\n", log_id(st.ffA, "--"), log_id(st.ffAholdmux, "--"), log_id(st.ffArstmux, "--"));
+ log_debug("ffB: %s %s %s\n", log_id(st.ffB, "--"), log_id(st.ffBholdmux, "--"), log_id(st.ffBrstmux, "--"));
+ log_debug("ffCD: %s %s\n", log_id(st.ffCD, "--"), log_id(st.ffCDholdmux, "--"));
+ log_debug("mul: %s\n", log_id(st.mul, "--"));
+ log_debug("ffFJKG: %s\n", log_id(st.ffFJKG, "--"));
+ log_debug("ffH: %s\n", log_id(st.ffH, "--"));
+ log_debug("add: %s\n", log_id(st.add, "--"));
+ log_debug("mux: %s\n", log_id(st.mux, "--"));
+ log_debug("ffO: %s %s %s\n", log_id(st.ffO, "--"), log_id(st.ffOholdmux, "--"), log_id(st.ffOrstmux, "--"));
+ log_debug("\n");
+
if (GetSize(st.sigA) > 16) {
log(" input A (%s) is too large (%d > 16).\n", log_signal(st.sigA), GetSize(st.sigA));
return;
@@ -52,59 +52,85 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
return;
}
- if (GetSize(st.sigS) > 32) {
- log(" accumulator (%s) is too large (%d > 32).\n", log_signal(st.sigS), GetSize(st.sigS));
+ if (GetSize(st.sigO) > 33) {
+ log(" adder/accumulator (%s) is too large (%d > 33).\n", log_signal(st.sigO), GetSize(st.sigO));
return;
}
- if (GetSize(st.sigY) > 32) {
- log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigY), GetSize(st.sigY));
+ if (GetSize(st.sigH) > 32) {
+ log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigH), GetSize(st.sigH));
return;
}
- bool mul_signed = st.mul->getParam("\\A_SIGNED").as_bool();
+ Cell *cell = st.mul;
+ if (cell->type == ID($mul)) {
+ log(" replacing %s with SB_MAC16 cell.\n", log_id(st.mul->type));
- log(" replacing $mul with SB_MAC16 cell.\n");
-
- Cell *cell = pm.module->addCell(NEW_ID, "\\SB_MAC16");
- pm.module->swap_names(cell, st.mul);
+ cell = pm.module->addCell(NEW_ID, ID(SB_MAC16));
+ pm.module->swap_names(cell, st.mul);
+ }
+ else log_assert(cell->type == ID(SB_MAC16));
// SB_MAC16 Input Interface
-
SigSpec A = st.sigA;
- A.extend_u0(16, mul_signed);
+ A.extend_u0(16, st.mul->getParam(ID(A_SIGNED)).as_bool());
+ log_assert(GetSize(A) == 16);
SigSpec B = st.sigB;
- B.extend_u0(16, mul_signed);
-
- SigSpec CD;
- if (st.muxA)
- CD = st.muxA->getPort("\\B");
- if (st.muxB)
- CD = st.muxB->getPort("\\A");
- CD.extend_u0(32, mul_signed);
+ B.extend_u0(16, st.mul->getParam(ID(B_SIGNED)).as_bool());
+ log_assert(GetSize(B) == 16);
- cell->setPort("\\A", A);
- cell->setPort("\\B", B);
- cell->setPort("\\C", CD.extract(0, 16));
- cell->setPort("\\D", CD.extract(16, 16));
+ SigSpec CD = st.sigCD;
+ if (CD.empty())
+ CD = RTLIL::Const(0, 32);
+ else
+ log_assert(GetSize(CD) == 32);
- cell->setParam("\\A_REG", st.ffA ? State::S1 : State::S0);
- cell->setParam("\\B_REG", st.ffB ? State::S1 : State::S0);
+ cell->setPort(ID::A, A);
+ cell->setPort(ID::B, B);
+ cell->setPort(ID(C), CD.extract(16, 16));
+ cell->setPort(ID(D), CD.extract(0, 16));
- cell->setPort("\\AHOLD", State::S0);
- cell->setPort("\\BHOLD", State::S0);
- cell->setPort("\\CHOLD", State::S0);
- cell->setPort("\\DHOLD", State::S0);
+ cell->setParam(ID(A_REG), st.ffA ? State::S1 : State::S0);
+ cell->setParam(ID(B_REG), st.ffB ? State::S1 : State::S0);
+ cell->setParam(ID(C_REG), st.ffCD ? State::S1 : State::S0);
+ cell->setParam(ID(D_REG), st.ffCD ? State::S1 : State::S0);
- cell->setPort("\\IRSTTOP", State::S0);
- cell->setPort("\\IRSTBOT", State::S0);
+ SigSpec AHOLD, BHOLD, CDHOLD;
+ if (st.ffAholdmux)
+ AHOLD = st.ffAholdpol ? st.ffAholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffAholdmux->getPort(ID(S)));
+ else
+ AHOLD = State::S0;
+ if (st.ffBholdmux)
+ BHOLD = st.ffBholdpol ? st.ffBholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffBholdmux->getPort(ID(S)));
+ else
+ BHOLD = State::S0;
+ if (st.ffCDholdmux)
+ CDHOLD = st.ffCDholdpol ? st.ffCDholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffCDholdmux->getPort(ID(S)));
+ else
+ CDHOLD = State::S0;
+ cell->setPort(ID(AHOLD), AHOLD);
+ cell->setPort(ID(BHOLD), BHOLD);
+ cell->setPort(ID(CHOLD), CDHOLD);
+ cell->setPort(ID(DHOLD), CDHOLD);
+
+ SigSpec IRSTTOP, IRSTBOT;
+ if (st.ffArstmux)
+ IRSTTOP = st.ffArstpol ? st.ffArstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffArstmux->getPort(ID(S)));
+ else
+ IRSTTOP = State::S0;
+ if (st.ffBrstmux)
+ IRSTBOT = st.ffBrstpol ? st.ffBrstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffBrstmux->getPort(ID(S)));
+ else
+ IRSTBOT = State::S0;
+ cell->setPort(ID(IRSTTOP), IRSTTOP);
+ cell->setPort(ID(IRSTBOT), IRSTBOT);
- if (st.clock_vld)
+ if (st.clock != SigBit())
{
- cell->setPort("\\CLK", st.clock);
- cell->setPort("\\CE", State::S1);
- cell->setParam("\\NEG_TRIGGER", st.clock_pol ? State::S0 : State::S1);
+ cell->setPort(ID(CLK), st.clock);
+ cell->setPort(ID(CE), State::S1);
+ cell->setParam(ID(NEG_TRIGGER), st.clock_pol ? State::S0 : State::S1);
log(" clock: %s (%s)", log_signal(st.clock), st.clock_pol ? "posedge" : "negedge");
@@ -114,91 +140,137 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
if (st.ffB)
log(" ffB:%s", log_id(st.ffB));
- if (st.ffY)
- log(" ffY:%s", log_id(st.ffY));
+ if (st.ffCD)
+ log(" ffCD:%s", log_id(st.ffCD));
+
+ if (st.ffFJKG)
+ log(" ffFJKG:%s", log_id(st.ffFJKG));
+
+ if (st.ffH)
+ log(" ffH:%s", log_id(st.ffH));
- if (st.ffS)
- log(" ffS:%s", log_id(st.ffS));
+ if (st.ffO)
+ log(" ffO:%s", log_id(st.ffO));
log("\n");
}
else
{
- cell->setPort("\\CLK", State::S0);
- cell->setPort("\\CE", State::S0);
- cell->setParam("\\NEG_TRIGGER", State::S0);
+ cell->setPort(ID(CLK), State::S0);
+ cell->setPort(ID(CE), State::S0);
+ cell->setParam(ID(NEG_TRIGGER), State::S0);
}
// SB_MAC16 Cascade Interface
- cell->setPort("\\SIGNEXTIN", State::Sx);
- cell->setPort("\\SIGNEXTOUT", pm.module->addWire(NEW_ID));
+ cell->setPort(ID(SIGNEXTIN), State::Sx);
+ cell->setPort(ID(SIGNEXTOUT), pm.module->addWire(NEW_ID));
- cell->setPort("\\CI", State::Sx);
- cell->setPort("\\CO", pm.module->addWire(NEW_ID));
+ cell->setPort(ID(CI), State::Sx);
- cell->setPort("\\ACCUMCI", State::Sx);
- cell->setPort("\\ACCUMCO", pm.module->addWire(NEW_ID));
+ cell->setPort(ID(ACCUMCI), State::Sx);
+ cell->setPort(ID(ACCUMCO), pm.module->addWire(NEW_ID));
// SB_MAC16 Output Interface
- SigSpec O = st.ffS ? st.sigS : st.sigY;
+ SigSpec O = st.sigO;
+ int O_width = GetSize(O);
+ if (O_width == 33) {
+ log_assert(st.add);
+ // If we have a signed multiply-add, then perform sign extension
+ if (st.add->getParam(ID(A_SIGNED)).as_bool() && st.add->getParam(ID(B_SIGNED)).as_bool())
+ pm.module->connect(O[32], O[31]);
+ else
+ cell->setPort(ID(CO), O[32]);
+ O.remove(O_width-1);
+ }
+ else
+ cell->setPort(ID(CO), pm.module->addWire(NEW_ID));
+ log_assert(GetSize(O) <= 32);
if (GetSize(O) < 32)
O.append(pm.module->addWire(NEW_ID, 32-GetSize(O)));
- cell->setPort("\\O", O);
-
- if (st.addAB) {
- log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type));
- cell->setPort("\\ADDSUBTOP", st.addAB->type == "$add" ? State::S0 : State::S1);
- cell->setPort("\\ADDSUBBOT", st.addAB->type == "$add" ? State::S0 : State::S1);
+ cell->setPort(ID(O), O);
+
+ bool accum = false;
+ if (st.add) {
+ accum = (st.ffO && st.add->getPort(st.addAB == ID::A ? ID::B : ID::A) == st.sigO);
+ if (accum)
+ log(" accumulator %s (%s)\n", log_id(st.add), log_id(st.add->type));
+ else
+ log(" adder %s (%s)\n", log_id(st.add), log_id(st.add->type));
+ cell->setPort(ID(ADDSUBTOP), st.add->type == ID($add) ? State::S0 : State::S1);
+ cell->setPort(ID(ADDSUBBOT), st.add->type == ID($add) ? State::S0 : State::S1);
} else {
- cell->setPort("\\ADDSUBTOP", State::S0);
- cell->setPort("\\ADDSUBBOT", State::S0);
+ cell->setPort(ID(ADDSUBTOP), State::S0);
+ cell->setPort(ID(ADDSUBBOT), State::S0);
}
- cell->setPort("\\ORSTTOP", State::S0);
- cell->setPort("\\ORSTBOT", State::S0);
+ SigSpec OHOLD;
+ if (st.ffOholdmux)
+ OHOLD = st.ffOholdpol ? st.ffOholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffOholdmux->getPort(ID(S)));
+ else
+ OHOLD = State::S0;
+ cell->setPort(ID(OHOLDTOP), OHOLD);
+ cell->setPort(ID(OHOLDBOT), OHOLD);
- cell->setPort("\\OHOLDTOP", State::S0);
- cell->setPort("\\OHOLDBOT", State::S0);
+ SigSpec ORST;
+ if (st.ffOrstmux)
+ ORST = st.ffOrstpol ? st.ffOrstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffOrstmux->getPort(ID(S)));
+ else
+ ORST = State::S0;
+ cell->setPort(ID(ORSTTOP), ORST);
+ cell->setPort(ID(ORSTBOT), ORST);
SigSpec acc_reset = State::S0;
- if (st.muxA)
- acc_reset = st.muxA->getPort("\\S");
- if (st.muxB)
- acc_reset = pm.module->Not(NEW_ID, st.muxB->getPort("\\S"));
-
- cell->setPort("\\OLOADTOP", acc_reset);
- cell->setPort("\\OLOADBOT", acc_reset);
+ if (st.mux) {
+ if (st.muxAB == ID::A)
+ acc_reset = st.mux->getPort(ID(S));
+ else
+ acc_reset = pm.module->Not(NEW_ID, st.mux->getPort(ID(S)));
+ }
+ cell->setPort(ID(OLOADTOP), acc_reset);
+ cell->setPort(ID(OLOADBOT), acc_reset);
// SB_MAC16 Remaining Parameters
- cell->setParam("\\C_REG", State::S0);
- cell->setParam("\\D_REG", State::S0);
+ cell->setParam(ID(TOP_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0);
+ cell->setParam(ID(BOT_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0);
+ cell->setParam(ID(PIPELINE_16x16_MULT_REG1), st.ffFJKG ? State::S1 : State::S0);
+ cell->setParam(ID(PIPELINE_16x16_MULT_REG2), st.ffH ? State::S1 : State::S0);
- cell->setParam("\\TOP_8x8_MULT_REG", st.ffY ? State::S1 : State::S0);
- cell->setParam("\\BOT_8x8_MULT_REG", st.ffY ? State::S1 : State::S0);
- cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffY ? State::S1 : State::S0);
- cell->setParam("\\PIPELINE_16x16_MULT_REG2", State::S0);
+ cell->setParam(ID(TOPADDSUB_LOWERINPUT), Const(2, 2));
+ cell->setParam(ID(TOPADDSUB_UPPERINPUT), accum ? State::S0 : State::S1);
+ cell->setParam(ID(TOPADDSUB_CARRYSELECT), Const(3, 2));
- cell->setParam("\\TOPOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2));
- cell->setParam("\\TOPADDSUB_LOWERINPUT", Const(2, 2));
- cell->setParam("\\TOPADDSUB_UPPERINPUT", State::S0);
- cell->setParam("\\TOPADDSUB_CARRYSELECT", Const(3, 2));
+ cell->setParam(ID(BOTADDSUB_LOWERINPUT), Const(2, 2));
+ cell->setParam(ID(BOTADDSUB_UPPERINPUT), accum ? State::S0 : State::S1);
+ cell->setParam(ID(BOTADDSUB_CARRYSELECT), Const(0, 2));
- cell->setParam("\\BOTOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2));
- cell->setParam("\\BOTADDSUB_LOWERINPUT", Const(2, 2));
- cell->setParam("\\BOTADDSUB_UPPERINPUT", State::S0);
- cell->setParam("\\BOTADDSUB_CARRYSELECT", Const(0, 2));
+ cell->setParam(ID(MODE_8x8), State::S0);
+ cell->setParam(ID(A_SIGNED), st.mul->getParam(ID(A_SIGNED)).as_bool());
+ cell->setParam(ID(B_SIGNED), st.mul->getParam(ID(B_SIGNED)).as_bool());
- cell->setParam("\\MODE_8x8", State::S0);
- cell->setParam("\\A_SIGNED", mul_signed ? State::S1 : State::S0);
- cell->setParam("\\B_SIGNED", mul_signed ? State::S1 : State::S0);
+ if (st.ffO) {
+ if (st.o_lo)
+ cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
+ else
+ cell->setParam(ID(TOPOUTPUT_SELECT), Const(1, 2));
- pm.autoremove(st.mul);
- pm.autoremove(st.ffY);
- pm.autoremove(st.ffS);
+ st.ffO->connections_.at(ID(Q)).replace(O, pm.module->addWire(NEW_ID, GetSize(O)));
+ cell->setParam(ID(BOTOUTPUT_SELECT), Const(1, 2));
+ }
+ else {
+ cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
+ cell->setParam(ID(BOTOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
+ }
+
+ if (cell != st.mul)
+ pm.autoremove(st.mul);
+ else
+ pm.blacklist(st.mul);
+ pm.autoremove(st.ffFJKG);
+ pm.autoremove(st.add);
}
struct Ice40DspPass : public Pass {
@@ -209,7 +281,17 @@ struct Ice40DspPass : public Pass {
log("\n");
log(" ice40_dsp [options] [selection]\n");
log("\n");
- log("Map multipliers and multiply-accumulate blocks to iCE40 DSP resources.\n");
+ log("Map multipliers ($mul/SB_MAC16) and multiply-accumulate ($mul/SB_MAC16 + $add)\n");
+ log("cells into iCE40 DSP resources.\n");
+ log("Currently, only the 16x16 multiply mode is supported and not the 2 x 8x8 mode.\n");
+ log("\n");
+ log("Pack input registers (A, B, {C,D}; with optional hold), pipeline registers\n");
+ log("({F,J,K,G}, H), output registers (O -- full 32-bits or lower 16-bits only; with\n");
+ log("optional hold), and post-adder into into the SB_MAC16 resource.\n");
+ log("\n");
+ log("Multiply-accumulate operations using the post-adder with feedback on the {C,D}\n");
+ log("input will be folded into the DSP. In this scenario only, resetting the\n");
+ log("the accumulator to an arbitrary value can be inferred to use the {C,D} input.\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
diff --git a/passes/pmgen/ice40_dsp.pmg b/passes/pmgen/ice40_dsp.pmg
index 7003092bb..6b6d2b56f 100644
--- a/passes/pmgen/ice40_dsp.pmg
+++ b/passes/pmgen/ice40_dsp.pmg
@@ -1,163 +1,574 @@
pattern ice40_dsp
state <SigBit> clock
-state <bool> clock_pol clock_vld
-state <SigSpec> sigA sigB sigY sigS
-state <Cell*> addAB muxAB
+state <bool> clock_pol cd_signed o_lo
+state <SigSpec> sigA sigB sigCD sigH sigO
+state <Cell*> add mux
+state <IdString> addAB muxAB
+
+state <bool> ffAholdpol ffBholdpol ffCDholdpol ffOholdpol
+state <bool> ffArstpol ffBrstpol ffCDrstpol ffOrstpol
+
+state <Cell*> ffA ffAholdmux ffArstmux ffB ffBholdmux ffBrstmux ffCD ffCDholdmux
+state <Cell*> ffFJKG ffH ffO ffOholdmux ffOrstmux
+
+// subpattern
+state <SigSpec> argQ argD
+state <bool> ffholdpol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffholdmux dffrstmux
+udata <bool> dffholdpol dffrstpol dffclock_pol
match mul
- select mul->type.in($mul)
+ select mul->type.in($mul, \SB_MAC16)
select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10
- select GetSize(mul->getPort(\Y)) > 10
endmatch
-match ffA
- select ffA->type.in($dff)
- // select nusers(port(ffA, \Q)) == 2
- index <SigSpec> port(ffA, \Q) === port(mul, \A)
- optional
-endmatch
+code sigA sigB sigH
+ auto unextend = [](const SigSpec &sig) {
+ int i;
+ for (i = GetSize(sig)-1; i > 0; i--)
+ if (sig[i] != sig[i-1])
+ break;
+ // Do not remove non-const sign bit
+ if (sig[i].wire)
+ ++i;
+ return sig.extract(0, i);
+ };
+ sigA = unextend(port(mul, \A));
+ sigB = unextend(port(mul, \B));
-code sigA clock clock_pol clock_vld
- sigA = port(mul, \A);
+ SigSpec O;
+ if (mul->type == $mul)
+ O = mul->getPort(\Y);
+ else if (mul->type == \SB_MAC16)
+ O = mul->getPort(\O);
+ else log_abort();
+ if (GetSize(O) <= 10)
+ reject;
- if (ffA) {
- sigA = port(ffA, \D);
+ // Only care about those bits that are used
+ int i;
+ for (i = 0; i < GetSize(O); i++) {
+ if (nusers(O[i]) <= 1)
+ break;
+ sigH.append(O[i]);
+ }
+ log_assert(nusers(O.extract_end(i)) <= 1);
+endcode
- clock = port(ffA, \CLK).as_bit();
- clock_pol = param(ffA, \CLK_POLARITY).as_bool();
- clock_vld = true;
+code argQ ffA ffAholdmux ffArstmux ffAholdpol ffArstpol sigA clock clock_pol
+ if (mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()) {
+ argQ = sigA;
+ subpattern(in_dffe);
+ if (dff) {
+ ffA = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ if (dffrstmux) {
+ ffArstmux = dffrstmux;
+ ffArstpol = dffrstpol;
+ }
+ if (dffholdmux) {
+ ffAholdmux = dffholdmux;
+ ffAholdpol = dffholdpol;
+ }
+ sigA = dffD;
+ }
}
endcode
-match ffB
- select ffB->type.in($dff)
- // select nusers(port(ffB, \Q)) == 2
- index <SigSpec> port(ffB, \Q) === port(mul, \B)
- optional
-endmatch
+code argQ ffB ffBholdmux ffBrstmux ffBholdpol ffBrstpol sigB clock clock_pol
+ if (mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()) {
+ argQ = sigB;
+ subpattern(in_dffe);
+ if (dff) {
+ ffB = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ if (dffrstmux) {
+ ffBrstmux = dffrstmux;
+ ffBrstpol = dffrstpol;
+ }
+ if (dffholdmux) {
+ ffBholdmux = dffholdmux;
+ ffBholdpol = dffholdpol;
+ }
+ sigB = dffD;
+ }
+ }
+endcode
-code sigB clock clock_pol clock_vld
- sigB = port(mul, \B);
+code argD ffFJKG sigH clock clock_pol
+ if (nusers(sigH) == 2 &&
+ (mul->type != \SB_MAC16 ||
+ (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool()))) {
+ argD = sigH;
+ subpattern(out_dffe);
+ if (dff) {
+ // F/J/K/G do not have a CE-like (hold) input
+ if (dffholdmux)
+ goto reject_ffFJKG;
- if (ffB) {
- sigB = port(ffB, \D);
- SigBit c = port(ffB, \CLK).as_bit();
- bool cp = param(ffB, \CLK_POLARITY).as_bool();
+ // Reset signal of F/J (IRSTTOP) and K/G (IRSTBOT)
+ // shared with A and B
+ if ((ffArstmux != NULL) != (dffrstmux != NULL))
+ goto reject_ffFJKG;
+ if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+ goto reject_ffFJKG;
+ if (ffArstmux) {
+ if (port(ffArstmux, \S) != port(dffrstmux, \S))
+ goto reject_ffFJKG;
+ if (ffArstpol != dffrstpol)
+ goto reject_ffFJKG;
+ }
+ if (ffBrstmux) {
+ if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+ goto reject_ffFJKG;
+ if (ffBrstpol != dffrstpol)
+ goto reject_ffFJKG;
+ }
- if (clock_vld && (c != clock || cp != clock_pol))
- reject;
+ ffFJKG = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ sigH = dffQ;
+
+reject_ffFJKG: ;
+ }
+ }
+endcode
+
+code argD ffH sigH sigO clock clock_pol
+ if (ffFJKG && nusers(sigH) == 2 &&
+ (mul->type != \SB_MAC16 || !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool())) {
+ argD = sigH;
+ subpattern(out_dffe);
+ if (dff) {
+ // H does not have a CE-like (hold) input
+ if (dffholdmux)
+ goto reject_ffH;
+
+ // Reset signal of H (IRSTBOT) shared with B
+ if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+ goto reject_ffH;
+ if (ffBrstmux) {
+ if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+ goto reject_ffH;
+ if (ffBrstpol != dffrstpol)
+ goto reject_ffH;
+ }
- clock = c;
- clock_pol = cp;
- clock_vld = true;
+ ffH = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ sigH = dffQ;
+
+reject_ffH: ;
+ }
}
+
+ sigO = sigH;
endcode
-match ffY
- select ffY->type.in($dff)
- select nusers(port(ffY, \D)) == 2
- index <SigSpec> port(ffY, \D) === port(mul, \Y)
+match add
+ if mul->type != \SB_MAC16 || (param(mul, \TOPOUTPUT_SELECT).as_int() == 3 && param(mul, \BOTOUTPUT_SELECT).as_int() == 3)
+
+ select add->type.in($add)
+ choice <IdString> AB {\A, \B}
+ select nusers(port(add, AB)) == 2
+
+ index <SigBit> port(add, AB)[0] === sigH[0]
+ filter GetSize(port(add, AB)) <= GetSize(sigH)
+ filter port(add, AB) == sigH.extract(0, GetSize(port(add, AB)))
+ filter nusers(sigH.extract_end(GetSize(port(add, AB)))) <= 1
+ set addAB AB
optional
endmatch
-code sigY clock clock_pol clock_vld
- sigY = port(mul, \Y);
+code sigCD sigO cd_signed
+ if (add) {
+ sigCD = port(add, addAB == \A ? \B : \A);
+ cd_signed = param(add, addAB == \A ? \B_SIGNED : \A_SIGNED).as_bool();
- if (ffY) {
- sigY = port(ffY, \Q);
- SigBit c = port(ffY, \CLK).as_bit();
- bool cp = param(ffY, \CLK_POLARITY).as_bool();
+ int natural_mul_width = GetSize(sigA) + GetSize(sigB);
+ int actual_mul_width = GetSize(sigH);
+ int actual_acc_width = GetSize(sigCD);
- if (clock_vld && (c != clock || cp != clock_pol))
+ if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+ reject;
+ // If accumulator, check adder width and signedness
+ if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(add, \A_SIGNED).as_bool()))
reject;
- clock = c;
- clock_pol = cp;
- clock_vld = true;
+ sigO = port(add, \Y);
}
endcode
-match addA
- select addA->type.in($add)
- select nusers(port(addA, \A)) == 2
- index <SigSpec> port(addA, \A) === sigY
+match mux
+ select mux->type == $mux
+ choice <IdString> AB {\A, \B}
+ select nusers(port(mux, AB)) == 2
+ index <SigSpec> port(mux, AB) === sigO
+ set muxAB AB
optional
endmatch
-match addB
- if !addA
- select addB->type.in($add, $sub)
- select nusers(port(addB, \B)) == 2
- index <SigSpec> port(addB, \B) === sigY
- optional
-endmatch
+code sigO
+ if (mux)
+ sigO = port(mux, \Y);
+endcode
+
+code argD ffO ffOholdmux ffOrstmux ffOholdpol ffOrstpol sigO sigCD clock clock_pol cd_signed o_lo
+ if (mul->type != \SB_MAC16 ||
+ // Ensure that register is not already used
+ ((param(mul, \TOPOUTPUT_SELECT, 0).as_int() != 1 && param(mul, \BOTOUTPUT_SELECT, 0).as_int() != 1) &&
+ // Ensure that OLOADTOP/OLOADBOT is unused or zero
+ (port(mul, \OLOADTOP, State::S0).is_fully_zero() && port(mul, \OLOADBOT, State::S0).is_fully_zero()))) {
+
+ dff = nullptr;
+
+ // First try entire sigO
+ if (nusers(sigO) == 2) {
+ argD = sigO;
+ subpattern(out_dffe);
+ }
+
+ // Otherwise try just its least significant 16 bits
+ if (!dff && GetSize(sigO) > 16) {
+ argD = sigO.extract(0, 16);
+ if (nusers(argD) == 2) {
+ subpattern(out_dffe);
+ o_lo = dff;
+ }
+ }
+
+ if (dff) {
+ ffO = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ if (dffrstmux) {
+ ffOrstmux = dffrstmux;
+ ffOrstpol = dffrstpol;
+ }
+ if (dffholdmux) {
+ ffOholdmux = dffholdmux;
+ ffOholdpol = dffholdpol;
+ }
+
+ sigO.replace(sigO.extract(0, GetSize(dffQ)), dffQ);
+ }
-code addAB sigS
- if (addA) {
- addAB = addA;
- sigS = port(addA, \B);
+ // Loading value into output register is not
+ // supported unless using accumulator
+ if (mux) {
+ if (sigCD != sigO)
+ reject;
+ sigCD = port(mux, muxAB == \B ? \A : \B);
+
+ cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool();
+ }
}
- if (addB) {
- addAB = addB;
- sigS = port(addB, \A);
+endcode
+
+code argQ ffCD ffCDholdmux ffCDholdpol ffCDrstpol sigCD clock clock_pol
+ if (!sigCD.empty() && sigCD != sigO &&
+ (mul->type != \SB_MAC16 || (!param(mul, \C_REG).as_bool() && !param(mul, \D_REG).as_bool()))) {
+ argQ = sigCD;
+ subpattern(in_dffe);
+ if (dff) {
+ if (dffholdmux) {
+ ffCDholdmux = dffholdmux;
+ ffCDholdpol = dffholdpol;
+ }
+
+ // Reset signal of C (IRSTTOP) and D (IRSTBOT)
+ // shared with A and B
+ if ((ffArstmux != NULL) != (dffrstmux != NULL))
+ goto reject_ffCD;
+ if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+ goto reject_ffCD;
+ if (ffArstmux) {
+ if (port(ffArstmux, \S) != port(dffrstmux, \S))
+ goto reject_ffCD;
+ if (ffArstpol != dffrstpol)
+ goto reject_ffCD;
+ }
+ if (ffBrstmux) {
+ if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+ goto reject_ffCD;
+ if (ffBrstpol != dffrstpol)
+ goto reject_ffCD;
+ }
+
+ ffCD = dff;
+ clock = dffclock;
+ clock_pol = dffclock_pol;
+ sigCD = dffD;
+
+reject_ffCD: ;
+ }
}
- if (addAB) {
- int natural_mul_width = GetSize(sigA) + GetSize(sigB);
- int actual_mul_width = GetSize(sigY);
- int actual_acc_width = GetSize(sigS);
+endcode
- if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+code sigCD
+ sigCD.extend_u0(32, cd_signed);
+endcode
+
+code
+ accept;
+endcode
+
+// #######################
+
+subpattern in_dffe
+arg argD argQ clock clock_pol
+
+code
+ dff = nullptr;
+ for (auto c : argQ.chunks()) {
+ if (!c.wire)
+ reject;
+ if (c.wire->get_bool_attribute(\keep))
reject;
- if ((actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool()))
+ Const init = c.wire->attributes.at(\init, State::Sx);
+ if (!init.is_fully_undef() && !init.is_fully_zero())
reject;
}
endcode
-match muxA
- if addAB
- select muxA->type.in($mux)
- select nusers(port(muxA, \A)) == 2
- index <SigSpec> port(muxA, \A) === port(addAB, \Y)
- optional
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+ // Check that the rest of argQ is present
+ filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ set ffoffset offset
endmatch
-match muxB
- if addAB
- if !muxA
- select muxB->type.in($mux)
- select nusers(port(muxB, \B)) == 2
- index <SigSpec> port(muxB, \B) === port(addAB, \Y)
- optional
+code argQ argD
+{
+ if (clock != SigBit()) {
+ if (port(ff, \CLK) != clock)
+ reject;
+ if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
+ reject;
+ }
+
+ SigSpec Q = port(ff, \Q);
+ dff = ff;
+ dffclock = port(ff, \CLK);
+ dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
+ dffD = argQ;
+ argD = port(ff, \D);
+ argQ = Q;
+ dffD.replace(argQ, argD);
+ // Only search for ffrstmux if dffD only
+ // has two (ff, ffrstmux) users
+ if (nusers(dffD) > 2)
+ argD = SigSpec();
+}
+endcode
+
+match ffrstmux
+ if false /* TODO: ice40 resets are actually async */
+
+ if !argD.empty()
+ select ffrstmux->type.in($mux)
+ index <SigSpec> port(ffrstmux, \Y) === argD
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ define <bool> pol (BA == \B)
+ set ffrstpol pol
+ semioptional
endmatch
-code muxAB
- muxAB = addAB;
- if (muxA)
- muxAB = muxA;
- if (muxB)
- muxAB = muxB;
+code argD
+ if (ffrstmux) {
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ argD = port(ffrstmux, ffrstpol ? \A : \B);
+ dffD.replace(port(ffrstmux, \Y), argD);
+
+ // Only search for ffholdmux if argQ has at
+ // least 3 users (ff, <upstream>, ffrstmux) and
+ // dffD only has two (ff, ffrstmux)
+ if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+ argD = SigSpec();
+ }
+ else
+ dffrstmux = nullptr;
endcode
-match ffS
- if muxAB
- select ffS->type.in($dff)
- select nusers(port(ffS, \D)) == 2
- index <SigSpec> port(ffS, \D) === port(muxAB, \Y)
- index <SigSpec> port(ffS, \Q) === sigS
+match ffholdmux
+ if !argD.empty()
+ select ffholdmux->type.in($mux)
+ index <SigSpec> port(ffholdmux, \Y) === argD
+ choice <IdString> BA {\B, \A}
+ index <SigSpec> port(ffholdmux, BA) === argQ
+ define <bool> pol (BA == \B)
+ set ffholdpol pol
+ semioptional
endmatch
-code clock clock_pol clock_vld
- if (ffS) {
- SigBit c = port(ffS, \CLK).as_bit();
- bool cp = param(ffS, \CLK_POLARITY).as_bool();
+code argD
+ if (ffholdmux) {
+ dffholdmux = ffholdmux;
+ dffholdpol = ffholdpol;
+ argD = port(ffholdmux, ffholdpol ? \A : \B);
+ dffD.replace(port(ffholdmux, \Y), argD);
+ }
+ else
+ dffholdmux = nullptr;
+endcode
- if (clock_vld && (c != clock || cp != clock_pol))
+// #######################
+
+subpattern out_dffe
+arg argD argQ clock clock_pol
+
+code
+ dff = nullptr;
+ for (auto c : argD.chunks())
+ if (c.wire->get_bool_attribute(\keep))
reject;
+endcode
- clock = c;
- clock_pol = cp;
- clock_vld = true;
+match ffholdmux
+ select ffholdmux->type.in($mux)
+ // ffholdmux output must have two users: ffholdmux and ff.D
+ select nusers(port(ffholdmux, \Y)) == 2
+
+ choice <IdString> BA {\B, \A}
+ // keep-last-value net must have at least three users: ffholdmux, ff, downstream sink(s)
+ select nusers(port(ffholdmux, BA)) >= 3
+
+ slice offset GetSize(port(ffholdmux, \Y))
+ define <IdString> AB (BA == \B ? \A : \B)
+ index <SigBit> port(ffholdmux, AB)[offset] === argD[0]
+
+ // Check that the rest of argD is present
+ filter GetSize(port(ffholdmux, AB)) >= offset + GetSize(argD)
+ filter port(ffholdmux, AB).extract(offset, GetSize(argD)) == argD
+
+ set ffoffset offset
+ define <bool> pol (BA == \B)
+ set ffholdpol pol
+
+ semioptional
+endmatch
+
+code argD argQ
+ dffholdmux = ffholdmux;
+ if (ffholdmux) {
+ SigSpec AB = port(ffholdmux, ffholdpol ? \A : \B);
+ SigSpec Y = port(ffholdmux, \Y);
+ argQ = argD;
+ argD.replace(AB, Y);
+ argQ.replace(AB, port(ffholdmux, ffholdpol ? \B : \A));
+
+ dffholdmux = ffholdmux;
+ dffholdpol = ffholdpol;
}
- accept;
+endcode
+
+match ffrstmux
+ if false /* TODO: ice40 resets are actually async */
+
+ select ffrstmux->type.in($mux)
+ // ffrstmux output must have two users: ffrstmux and ff.D
+ select nusers(port(ffrstmux, \Y)) == 2
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ slice offset GetSize(port(ffrstmux, \Y))
+ define <IdString> AB (BA == \B ? \A : \B)
+ index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
+
+ // Check that offset is consistent
+ filter !ffholdmux || ffoffset == offset
+ // Check that the rest of argD is present
+ filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD)
+ filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
+
+ set ffoffset offset
+ define <bool> pol (AB == \A)
+ set ffrstpol pol
+
+ semioptional
+endmatch
+
+code argD argQ
+ dffrstmux = ffrstmux;
+ if (ffrstmux) {
+ SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
+ SigSpec Y = port(ffrstmux, \Y);
+ argD.replace(AB, Y);
+
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ }
+endcode
+
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \D)[offset] === argD[0]
+
+ // Check that offset is consistent
+ filter (!ffholdmux && !ffrstmux) || ffoffset == offset
+ // Check that the rest of argD is present
+ filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
+ filter port(ff, \D).extract(offset, GetSize(argD)) == argD
+ // Check that FF.Q is connected to CE-mux
+ filter !ffholdmux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ set ffoffset offset
+endmatch
+
+code argQ
+ if (ff) {
+ if (clock != SigBit()) {
+ if (port(ff, \CLK) != clock)
+ reject;
+ if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
+ reject;
+ }
+ SigSpec D = port(ff, \D);
+ SigSpec Q = port(ff, \Q);
+ if (!ffholdmux) {
+ argQ = argD;
+ argQ.replace(D, Q);
+ }
+
+ for (auto c : argQ.chunks()) {
+ Const init = c.wire->attributes.at(\init, State::Sx);
+ if (!init.is_fully_undef() && !init.is_fully_zero())
+ reject;
+ }
+
+ dff = ff;
+ dffQ = argQ;
+ dffclock = port(ff, \CLK);
+ dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
+ }
+ // No enable/reset mux possible without flop
+ else if (dffholdmux || dffrstmux)
+ reject;
endcode
diff --git a/passes/pmgen/ice40_wrapcarry.pmg b/passes/pmgen/ice40_wrapcarry.pmg
index 9e64c7467..bb59edb0c 100644
--- a/passes/pmgen/ice40_wrapcarry.pmg
+++ b/passes/pmgen/ice40_wrapcarry.pmg
@@ -9,3 +9,7 @@ match lut
index <SigSpec> port(lut, \I1) === port(carry, \I0)
index <SigSpec> port(lut, \I2) === port(carry, \I1)
endmatch
+
+code
+ accept;
+endcode
diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc
index e7f95cf85..2230145df 100644
--- a/passes/pmgen/peepopt.cc
+++ b/passes/pmgen/peepopt.cc
@@ -24,8 +24,11 @@ USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
bool did_something;
+dict<SigBit, State> initbits;
+pool<SigBit> rminitbits;
#include "passes/pmgen/peepopt_pm.h"
+#include "generate.h"
struct PeepoptPass : public Pass {
PeepoptPass() : Pass("peepopt", "collection of peephole optimizers") { }
@@ -40,26 +43,86 @@ struct PeepoptPass : public Pass {
}
void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
{
+ std::string genmode;
+
log_header(design, "Executing PEEPOPT pass (run peephole optimizers).\n");
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
- // if (args[argidx] == "-singleton") {
- // singleton_mode = true;
- // continue;
- // }
+ if (args[argidx] == "-generate" && argidx+1 < args.size()) {
+ genmode = args[++argidx];
+ continue;
+ }
break;
}
extra_args(args, argidx, design);
- for (auto module : design->selected_modules()) {
+ if (!genmode.empty())
+ {
+ initbits.clear();
+ rminitbits.clear();
+
+ if (genmode == "shiftmul")
+ GENERATE_PATTERN(peepopt_pm, shiftmul);
+ else if (genmode == "muldiv")
+ GENERATE_PATTERN(peepopt_pm, muldiv);
+ else if (genmode == "dffmux")
+ GENERATE_PATTERN(peepopt_pm, dffmux);
+ else
+ log_abort();
+ return;
+ }
+
+ for (auto module : design->selected_modules())
+ {
did_something = true;
- while (did_something) {
+
+ while (did_something)
+ {
did_something = false;
- peepopt_pm pm(module, module->selected_cells());
+ initbits.clear();
+ rminitbits.clear();
+
+ peepopt_pm pm(module);
+
+ for (auto w : module->wires()) {
+ auto it = w->attributes.find(ID(init));
+ if (it != w->attributes.end()) {
+ SigSpec sig = pm.sigmap(w);
+ Const val = it->second;
+ int len = std::min(GetSize(sig), GetSize(val));
+ for (int i = 0; i < len; i++) {
+ if (sig[i].wire == nullptr)
+ continue;
+ if (val[i] != State::S0 && val[i] != State::S1)
+ continue;
+ initbits[sig[i]] = val[i];
+ }
+ }
+ }
+
+ pm.setup(module->selected_cells());
+
pm.run_shiftmul();
pm.run_muldiv();
+ pm.run_dffmux();
+
+ for (auto w : module->wires()) {
+ auto it = w->attributes.find(ID(init));
+ if (it != w->attributes.end()) {
+ SigSpec sig = pm.sigmap(w);
+ Const &val = it->second;
+ int len = std::min(GetSize(sig), GetSize(val));
+ for (int i = 0; i < len; i++) {
+ if (rminitbits.count(sig[i]))
+ val[i] = State::Sx;
+ }
+ }
+ }
+
+ initbits.clear();
+ rminitbits.clear();
}
}
}
diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg
new file mode 100644
index 000000000..0069b0570
--- /dev/null
+++ b/passes/pmgen/peepopt_dffmux.pmg
@@ -0,0 +1,171 @@
+pattern dffmux
+
+state <IdString> cemuxAB rstmuxBA
+state <SigSpec> sigD
+
+match dff
+ select dff->type == $dff
+ select GetSize(port(dff, \D)) > 1
+endmatch
+
+code sigD
+ sigD = port(dff, \D);
+endcode
+
+match rstmux
+ select rstmux->type == $mux
+ select GetSize(port(rstmux, \Y)) > 1
+ index <SigSpec> port(rstmux, \Y) === sigD
+ choice <IdString> BA {\B, \A}
+ select port(rstmux, BA).is_fully_const()
+ set rstmuxBA BA
+ semioptional
+endmatch
+
+code sigD
+ if (rstmux)
+ sigD = port(rstmux, rstmuxBA == \B ? \A : \B);
+endcode
+
+match cemux
+ select cemux->type == $mux
+ select GetSize(port(cemux, \Y)) > 1
+ index <SigSpec> port(cemux, \Y) === sigD
+ choice <IdString> AB {\A, \B}
+ index <SigSpec> port(cemux, AB) === port(dff, \Q)
+ set cemuxAB AB
+ semioptional
+endmatch
+
+code
+ if (!cemux && !rstmux)
+ reject;
+endcode
+
+code
+ Const rst;
+ SigSpec D;
+ if (cemux) {
+ D = port(cemux, cemuxAB == \A ? \B : \A);
+ if (rstmux)
+ rst = port(rstmux, rstmuxBA).as_const();
+ else
+ rst = Const(State::Sx, GetSize(D));
+ }
+ else {
+ log_assert(rstmux);
+ D = port(rstmux, rstmuxBA == \B ? \A : \B);
+ rst = port(rstmux, rstmuxBA).as_const();
+ }
+ SigSpec Q = port(dff, \Q);
+ int width = GetSize(D);
+
+ SigSpec dffD = dff->getPort(\D);
+ SigSpec dffQ = dff->getPort(\Q);
+
+ Const initval;
+ for (auto b : Q) {
+ auto it = initbits.find(b);
+ initval.bits.push_back(it == initbits.end() ? State::Sx : it->second);
+ }
+
+ auto cmpx = [=](State lhs, State rhs) {
+ if (lhs == State::Sx || rhs == State::Sx)
+ return true;
+ return lhs == rhs;
+ };
+
+ int i = width-1;
+ while (i > 1) {
+ if (D[i] != D[i-1])
+ break;
+ if (!cmpx(rst[i], rst[i-1]))
+ break;
+ if (!cmpx(initval[i], initval[i-1]))
+ break;
+ if (!cmpx(rst[i], initval[i]))
+ break;
+ rminitbits.insert(Q[i]);
+ module->connect(Q[i], Q[i-1]);
+ i--;
+ }
+ if (i < width-1) {
+ did_something = true;
+ if (cemux) {
+ SigSpec ceA = cemux->getPort(\A);
+ SigSpec ceB = cemux->getPort(\B);
+ SigSpec ceY = cemux->getPort(\Y);
+ ceA.remove(i, width-1-i);
+ ceB.remove(i, width-1-i);
+ ceY.remove(i, width-1-i);
+ cemux->setPort(\A, ceA);
+ cemux->setPort(\B, ceB);
+ cemux->setPort(\Y, ceY);
+ cemux->fixup_parameters();
+ blacklist(cemux);
+ }
+ if (rstmux) {
+ SigSpec rstA = rstmux->getPort(\A);
+ SigSpec rstB = rstmux->getPort(\B);
+ SigSpec rstY = rstmux->getPort(\Y);
+ rstA.remove(i, width-1-i);
+ rstB.remove(i, width-1-i);
+ rstY.remove(i, width-1-i);
+ rstmux->setPort(\A, rstA);
+ rstmux->setPort(\B, rstB);
+ rstmux->setPort(\Y, rstY);
+ rstmux->fixup_parameters();
+ blacklist(rstmux);
+ }
+ dffD.remove(i, width-1-i);
+ dffQ.remove(i, width-1-i);
+ dff->setPort(\D, dffD);
+ dff->setPort(\Q, dffQ);
+ dff->fixup_parameters();
+ blacklist(dff);
+
+ log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i);
+ width = i+1;
+ }
+ if (cemux) {
+ SigSpec ceA = cemux->getPort(\A);
+ SigSpec ceB = cemux->getPort(\B);
+ SigSpec ceY = cemux->getPort(\Y);
+
+ int count = 0;
+ for (int i = width-1; i >= 0; i--) {
+ if (D[i].wire)
+ continue;
+ if (cmpx(rst[i], D[i].data) && cmpx(initval[i], D[i].data)) {
+ count++;
+ rminitbits.insert(Q[i]);
+ module->connect(Q[i], D[i]);
+ ceA.remove(i);
+ ceB.remove(i);
+ ceY.remove(i);
+ dffD.remove(i);
+ dffQ.remove(i);
+ }
+ }
+ if (count > 0)
+ {
+ did_something = true;
+
+ cemux->setPort(\A, ceA);
+ cemux->setPort(\B, ceB);
+ cemux->setPort(\Y, ceY);
+ cemux->fixup_parameters();
+ blacklist(cemux);
+
+ dff->setPort(\D, dffD);
+ dff->setPort(\Q, dffQ);
+ dff->fixup_parameters();
+ blacklist(dff);
+
+ log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count);
+ }
+ }
+
+ if (did_something)
+ accept;
+endcode
diff --git a/passes/pmgen/pmgen.py b/passes/pmgen/pmgen.py
index 573722d68..df0ffaff2 100644
--- a/passes/pmgen/pmgen.py
+++ b/passes/pmgen/pmgen.py
@@ -286,7 +286,7 @@ def process_pmgfile(f, filename):
block["gencode"].append(rewrite_cpp(l.rstrip()))
break
- assert False
+ raise RuntimeError("'%s' statement not recognised on line %d" % (a[0], linenr))
if block["optional"]:
assert not block["semioptional"]
@@ -305,7 +305,8 @@ def process_pmgfile(f, filename):
block["states"] = set()
for s in line.split()[1:]:
- assert s in state_types[current_pattern]
+ if s not in state_types[current_pattern]:
+ raise RuntimeError("'%s' not in state_types" % s)
block["states"].add(s)
codetype = "code"
@@ -327,7 +328,7 @@ def process_pmgfile(f, filename):
blocks.append(block)
continue
- assert False
+ raise RuntimeError("'%s' command not recognised" % cmd)
for fn in pmgfiles:
with open(fn, "r") as f:
@@ -361,6 +362,7 @@ with open(outfile, "w") as f:
print(" Module *module;", file=f)
print(" SigMap sigmap;", file=f)
print(" std::function<void()> on_accept;", file=f)
+ print(" bool setup_done;", file=f)
print(" bool generate_mode;", file=f)
print(" int accept_cnt;", file=f)
print("", file=f)
@@ -452,11 +454,19 @@ with open(outfile, "w") as f:
print(" return sigmap(cell->getPort(portname));", file=f)
print(" }", file=f)
print("", file=f)
+ print(" SigSpec port(Cell *cell, IdString portname, const SigSpec& defval) {", file=f)
+ print(" return sigmap(cell->connections_.at(portname, defval));", file=f)
+ print(" }", file=f)
+ print("", file=f)
print(" Const param(Cell *cell, IdString paramname) {", file=f)
print(" return cell->getParam(paramname);", file=f)
print(" }", file=f)
print("", file=f)
+ print(" Const param(Cell *cell, IdString paramname, const Const& defval) {", file=f)
+ print(" return cell->parameters.at(paramname, defval);", file=f)
+ print(" }", file=f)
+ print("", file=f)
print(" int nusers(const SigSpec &sig) {", file=f)
print(" pool<Cell*> users;", file=f)
@@ -468,7 +478,17 @@ with open(outfile, "w") as f:
print("", file=f)
print(" {}_pm(Module *module, const vector<Cell*> &cells) :".format(prefix), file=f)
- print(" module(module), sigmap(module), generate_mode(false), rngseed(12345678) {", file=f)
+ print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f)
+ print(" setup(cells);", file=f)
+ print(" }", file=f)
+ print("", file=f)
+
+ print(" {}_pm(Module *module) :".format(prefix), file=f)
+ print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f)
+ print(" }", file=f)
+ print("", file=f)
+
+ print(" void setup(const vector<Cell*> &cells) {", file=f)
for current_pattern in sorted(patterns.keys()):
for s, t in sorted(udata_types[current_pattern].items()):
if t.endswith("*"):
@@ -476,6 +496,8 @@ with open(outfile, "w") as f:
else:
print(" ud_{}.{} = {}();".format(current_pattern, s, t), file=f)
current_pattern = None
+ print(" log_assert(!setup_done);", file=f)
+ print(" setup_done = true;", file=f)
print(" for (auto port : module->ports)", file=f)
print(" add_siguser(module->wire(port), nullptr);", file=f)
print(" for (auto cell : module->cells())", file=f)
@@ -530,6 +552,7 @@ with open(outfile, "w") as f:
for current_pattern in sorted(patterns.keys()):
print(" int run_{}(std::function<void()> on_accept_f) {{".format(current_pattern), file=f)
+ print(" log_assert(setup_done);", file=f)
print(" accept_cnt = 0;", file=f)
print(" on_accept = on_accept_f;", file=f)
print(" rollback = 0;", file=f)
diff --git a/passes/pmgen/test_pmgen.cc b/passes/pmgen/test_pmgen.cc
index 4f3eec935..72dc18dcc 100644
--- a/passes/pmgen/test_pmgen.cc
+++ b/passes/pmgen/test_pmgen.cc
@@ -23,13 +23,11 @@
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
-// for peepopt_pm
-bool did_something;
-
#include "passes/pmgen/test_pmgen_pm.h"
#include "passes/pmgen/ice40_dsp_pm.h"
#include "passes/pmgen/xilinx_srl_pm.h"
-#include "passes/pmgen/peepopt_pm.h"
+
+#include "generate.h"
void reduce_chain(test_pmgen_pm &pm)
{
@@ -118,123 +116,6 @@ void opt_eqpmux(test_pmgen_pm &pm)
log(" -> %s (%s)\n", log_id(c), log_id(c->type));
}
-#define GENERATE_PATTERN(pmclass, pattern) \
- generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design)
-
-void pmtest_addports(Module *module)
-{
- pool<SigBit> driven_bits, used_bits;
- SigMap sigmap(module);
- int icnt = 0, ocnt = 0;
-
- for (auto cell : module->cells())
- for (auto conn : cell->connections())
- {
- if (cell->input(conn.first))
- for (auto bit : sigmap(conn.second))
- used_bits.insert(bit);
- if (cell->output(conn.first))
- for (auto bit : sigmap(conn.second))
- driven_bits.insert(bit);
- }
-
- for (auto wire : vector<Wire*>(module->wires()))
- {
- SigSpec ibits, obits;
- for (auto bit : sigmap(wire)) {
- if (!used_bits.count(bit))
- obits.append(bit);
- if (!driven_bits.count(bit))
- ibits.append(bit);
- }
- if (!ibits.empty()) {
- Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits));
- w->port_input = true;
- module->connect(ibits, w);
- }
- if (!obits.empty()) {
- Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits));
- w->port_output = true;
- module->connect(w, obits);
- }
- }
-
- module->fixup_ports();
-}
-
-template <class pm>
-void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design)
-{
- log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass);
-
- int modcnt = 0;
- int maxmodcnt = 100;
- int maxsubcnt = 4;
- int timeout = 0;
- vector<Module*> mods;
-
- while (modcnt < maxmodcnt)
- {
- int submodcnt = 0, itercnt = 0, cellcnt = 0;
- Module *mod = design->addModule(NEW_ID);
-
- while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000)
- {
- if (timeout++ > 10000)
- log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n");
-
- pm matcher(mod, mod->cells());
-
- matcher.rng(1);
- matcher.rngseed += modcnt;
- matcher.rng(1);
- matcher.rngseed += submodcnt;
- matcher.rng(1);
- matcher.rngseed += itercnt;
- matcher.rng(1);
- matcher.rngseed += cellcnt;
- matcher.rng(1);
-
- if (GetSize(mod->cells()) != cellcnt)
- {
- bool found_match = false;
- run(matcher, [&](){ found_match = true; });
- cellcnt = GetSize(mod->cells());
-
- if (found_match) {
- Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d",
- pmclass, pattern, modcnt++));
- log("Creating module %s with %d cells.\n", log_id(m), cellcnt);
- mod->cloneInto(m);
- pmtest_addports(m);
- mods.push_back(m);
- submodcnt++;
- timeout = 0;
- }
- }
-
- matcher.generate_mode = true;
- run(matcher, [](){});
- }
-
- if (submodcnt && maxsubcnt < (1 << 16))
- maxsubcnt *= 2;
-
- design->remove(mod);
- }
-
- Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern));
- log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods));
- for (auto mod : mods) {
- Cell *c = m->addCell(mod->name, mod->name);
- for (auto port : mod->ports) {
- Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port)));
- c->setPort(port, w);
- }
- }
- pmtest_addports(m);
-}
-
struct TestPmgenPass : public Pass {
TestPmgenPass() : Pass("test_pmgen", "test pass for pmgen") { }
void help() YS_OVERRIDE
@@ -355,12 +236,6 @@ struct TestPmgenPass : public Pass {
if (pattern == "xilinx_srl.variable")
return GENERATE_PATTERN(xilinx_srl_pm, variable);
- if (pattern == "peepopt-muldiv")
- return GENERATE_PATTERN(peepopt_pm, muldiv);
-
- if (pattern == "peepopt-shiftmul")
- return GENERATE_PATTERN(peepopt_pm, shiftmul);
-
log_cmd_error("Unknown pattern: %s\n", pattern.c_str());
}
diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc
new file mode 100644
index 000000000..054e123e4
--- /dev/null
+++ b/passes/pmgen/xilinx_dsp.cc
@@ -0,0 +1,646 @@
+/*
+ * yosys -- Yosys Open SYnthesis Suite
+ *
+ * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at>
+ * 2019 Eddie Hung <eddie@fpgeh.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+#include <deque>
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+#include "passes/pmgen/xilinx_dsp_pm.h"
+#include "passes/pmgen/xilinx_dsp_CREG_pm.h"
+#include "passes/pmgen/xilinx_dsp_cascade_pm.h"
+
+static Cell* addDsp(Module *module) {
+ Cell *cell = module->addCell(NEW_ID, ID(DSP48E1));
+ cell->setParam(ID(ACASCREG), 0);
+ cell->setParam(ID(ADREG), 0);
+ cell->setParam(ID(A_INPUT), Const("DIRECT"));
+ cell->setParam(ID(ALUMODEREG), 0);
+ cell->setParam(ID(AREG), 0);
+ cell->setParam(ID(BCASCREG), 0);
+ cell->setParam(ID(B_INPUT), Const("DIRECT"));
+ cell->setParam(ID(BREG), 0);
+ cell->setParam(ID(CARRYINREG), 0);
+ cell->setParam(ID(CARRYINSELREG), 0);
+ cell->setParam(ID(CREG), 0);
+ cell->setParam(ID(DREG), 0);
+ cell->setParam(ID(INMODEREG), 0);
+ cell->setParam(ID(MREG), 0);
+ cell->setParam(ID(OPMODEREG), 0);
+ cell->setParam(ID(PREG), 0);
+ cell->setParam(ID(USE_MULT), Const("NONE"));
+ cell->setParam(ID(USE_SIMD), Const("ONE48"));
+ cell->setParam(ID(USE_DPORT), Const("FALSE"));
+
+ cell->setPort(ID(D), Const(0, 25));
+ cell->setPort(ID(INMODE), Const(0, 5));
+ cell->setPort(ID(ALUMODE), Const(0, 4));
+ cell->setPort(ID(OPMODE), Const(0, 7));
+ cell->setPort(ID(CARRYINSEL), Const(0, 3));
+ cell->setPort(ID(ACIN), Const(0, 30));
+ cell->setPort(ID(BCIN), Const(0, 18));
+ cell->setPort(ID(PCIN), Const(0, 48));
+ cell->setPort(ID(CARRYIN), Const(0, 1));
+ return cell;
+}
+
+void xilinx_simd_pack(Module *module, const std::vector<Cell*> &selected_cells)
+{
+ std::deque<Cell*> simd12_add, simd12_sub;
+ std::deque<Cell*> simd24_add, simd24_sub;
+
+ for (auto cell : selected_cells) {
+ if (!cell->type.in(ID($add), ID($sub)))
+ continue;
+ SigSpec Y = cell->getPort(ID(Y));
+ if (!Y.is_chunk())
+ continue;
+ if (!Y.as_chunk().wire->get_strpool_attribute(ID(use_dsp)).count("simd"))
+ continue;
+ if (GetSize(Y) > 25)
+ continue;
+ SigSpec A = cell->getPort(ID(A));
+ SigSpec B = cell->getPort(ID(B));
+ if (GetSize(Y) <= 13) {
+ if (GetSize(A) > 12)
+ continue;
+ if (GetSize(B) > 12)
+ continue;
+ if (cell->type == ID($add))
+ simd12_add.push_back(cell);
+ else if (cell->type == ID($sub))
+ simd12_sub.push_back(cell);
+ }
+ else if (GetSize(Y) <= 25) {
+ if (GetSize(A) > 24)
+ continue;
+ if (GetSize(B) > 24)
+ continue;
+ if (cell->type == ID($add))
+ simd24_add.push_back(cell);
+ else if (cell->type == ID($sub))
+ simd24_sub.push_back(cell);
+ }
+ else
+ log_abort();
+ }
+
+ auto f12 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) {
+ SigSpec A = lane->getPort(ID(A));
+ SigSpec B = lane->getPort(ID(B));
+ SigSpec Y = lane->getPort(ID(Y));
+ A.extend_u0(12, lane->getParam(ID(A_SIGNED)).as_bool());
+ B.extend_u0(12, lane->getParam(ID(B_SIGNED)).as_bool());
+ AB.append(A);
+ C.append(B);
+ if (GetSize(Y) < 13)
+ Y.append(module->addWire(NEW_ID, 13-GetSize(Y)));
+ else
+ log_assert(GetSize(Y) == 13);
+ P.append(Y.extract(0, 12));
+ CARRYOUT.append(Y[12]);
+ };
+ auto g12 = [&f12,module](std::deque<Cell*> &simd12) {
+ while (simd12.size() > 1) {
+ SigSpec AB, C, P, CARRYOUT;
+
+ Cell *lane1 = simd12.front();
+ simd12.pop_front();
+ Cell *lane2 = simd12.front();
+ simd12.pop_front();
+ Cell *lane3 = nullptr;
+ Cell *lane4 = nullptr;
+
+ if (!simd12.empty()) {
+ lane3 = simd12.front();
+ simd12.pop_front();
+ if (!simd12.empty()) {
+ lane4 = simd12.front();
+ simd12.pop_front();
+ }
+ }
+
+ log("Analysing %s.%s for Xilinx DSP SIMD12 packing.\n", log_id(module), log_id(lane1));
+
+ Cell *cell = addDsp(module);
+ cell->setParam(ID(USE_SIMD), Const("FOUR12"));
+ // X = A:B
+ // Y = 0
+ // Z = C
+ cell->setPort(ID(OPMODE), Const::from_string("0110011"));
+
+ log_assert(lane1);
+ log_assert(lane2);
+ f12(AB, C, P, CARRYOUT, lane1);
+ f12(AB, C, P, CARRYOUT, lane2);
+ if (lane3) {
+ f12(AB, C, P, CARRYOUT, lane3);
+ if (lane4)
+ f12(AB, C, P, CARRYOUT, lane4);
+ else {
+ AB.append(Const(0, 12));
+ C.append(Const(0, 12));
+ P.append(module->addWire(NEW_ID, 12));
+ CARRYOUT.append(module->addWire(NEW_ID, 1));
+ }
+ }
+ else {
+ AB.append(Const(0, 24));
+ C.append(Const(0, 24));
+ P.append(module->addWire(NEW_ID, 24));
+ CARRYOUT.append(module->addWire(NEW_ID, 2));
+ }
+ log_assert(GetSize(AB) == 48);
+ log_assert(GetSize(C) == 48);
+ log_assert(GetSize(P) == 48);
+ log_assert(GetSize(CARRYOUT) == 4);
+ cell->setPort(ID(A), AB.extract(18, 30));
+ cell->setPort(ID(B), AB.extract(0, 18));
+ cell->setPort(ID(C), C);
+ cell->setPort(ID(P), P);
+ cell->setPort(ID(CARRYOUT), CARRYOUT);
+ if (lane1->type == ID($sub))
+ cell->setPort(ID(ALUMODE), Const::from_string("0011"));
+
+ module->remove(lane1);
+ module->remove(lane2);
+ if (lane3) module->remove(lane3);
+ if (lane4) module->remove(lane4);
+
+ module->design->select(module, cell);
+ }
+ };
+ g12(simd12_add);
+ g12(simd12_sub);
+
+ auto f24 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) {
+ SigSpec A = lane->getPort(ID(A));
+ SigSpec B = lane->getPort(ID(B));
+ SigSpec Y = lane->getPort(ID(Y));
+ A.extend_u0(24, lane->getParam(ID(A_SIGNED)).as_bool());
+ B.extend_u0(24, lane->getParam(ID(B_SIGNED)).as_bool());
+ C.append(A);
+ AB.append(B);
+ if (GetSize(Y) < 25)
+ Y.append(module->addWire(NEW_ID, 25-GetSize(Y)));
+ else
+ log_assert(GetSize(Y) == 25);
+ P.append(Y.extract(0, 24));
+ CARRYOUT.append(module->addWire(NEW_ID)); // TWO24 uses every other bit
+ CARRYOUT.append(Y[24]);
+ };
+ auto g24 = [&f24,module](std::deque<Cell*> &simd24) {
+ while (simd24.size() > 1) {
+ SigSpec AB;
+ SigSpec C;
+ SigSpec P;
+ SigSpec CARRYOUT;
+
+ Cell *lane1 = simd24.front();
+ simd24.pop_front();
+ Cell *lane2 = simd24.front();
+ simd24.pop_front();
+
+ log("Analysing %s.%s for Xilinx DSP SIMD24 packing.\n", log_id(module), log_id(lane1));
+
+ Cell *cell = addDsp(module);
+ cell->setParam(ID(USE_SIMD), Const("TWO24"));
+ // X = A:B
+ // Y = 0
+ // Z = C
+ cell->setPort(ID(OPMODE), Const::from_string("0110011"));
+
+ log_assert(lane1);
+ log_assert(lane2);
+ f24(AB, C, P, CARRYOUT, lane1);
+ f24(AB, C, P, CARRYOUT, lane2);
+ log_assert(GetSize(AB) == 48);
+ log_assert(GetSize(C) == 48);
+ log_assert(GetSize(P) == 48);
+ log_assert(GetSize(CARRYOUT) == 4);
+ cell->setPort(ID(A), AB.extract(18, 30));
+ cell->setPort(ID(B), AB.extract(0, 18));
+ cell->setPort(ID(C), C);
+ cell->setPort(ID(P), P);
+ cell->setPort(ID(CARRYOUT), CARRYOUT);
+ if (lane1->type == ID($sub))
+ cell->setPort(ID(ALUMODE), Const::from_string("0011"));
+
+ module->remove(lane1);
+ module->remove(lane2);
+
+ module->design->select(module, cell);
+ }
+ };
+ g24(simd24_add);
+ g24(simd24_sub);
+}
+
+void xilinx_dsp_pack(xilinx_dsp_pm &pm)
+{
+ auto &st = pm.st_xilinx_dsp_pack;
+
+ log("Analysing %s.%s for Xilinx DSP packing.\n", log_id(pm.module), log_id(st.dsp));
+
+ log_debug("preAdd: %s\n", log_id(st.preAdd, "--"));
+ log_debug("ffAD: %s %s %s\n", log_id(st.ffAD, "--"), log_id(st.ffADcemux, "--"), log_id(st.ffADrstmux, "--"));
+ log_debug("ffA2: %s %s %s\n", log_id(st.ffA2, "--"), log_id(st.ffA2cemux, "--"), log_id(st.ffA2rstmux, "--"));
+ log_debug("ffA1: %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--"));
+ log_debug("ffB2: %s %s %s\n", log_id(st.ffB2, "--"), log_id(st.ffB2cemux, "--"), log_id(st.ffB2rstmux, "--"));
+ log_debug("ffB1: %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--"));
+ log_debug("ffD: %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--"));
+ log_debug("dsp: %s\n", log_id(st.dsp, "--"));
+ log_debug("ffM: %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--"));
+ log_debug("postAdd: %s\n", log_id(st.postAdd, "--"));
+ log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--"));
+ log_debug("ffP: %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--"));
+ log_debug("overflow: %s\n", log_id(st.overflow, "--"));
+
+ Cell *cell = st.dsp;
+
+ if (st.preAdd) {
+ log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type));
+ bool A_SIGNED = st.preAdd->getParam(ID(A_SIGNED)).as_bool();
+ bool D_SIGNED = st.preAdd->getParam(ID(B_SIGNED)).as_bool();
+ if (st.sigA == st.preAdd->getPort(ID(B)))
+ std::swap(A_SIGNED, D_SIGNED);
+ st.sigA.extend_u0(30, A_SIGNED);
+ st.sigD.extend_u0(25, D_SIGNED);
+ cell->setPort(ID(A), st.sigA);
+ cell->setPort(ID(D), st.sigD);
+ cell->setPort(ID(INMODE), Const::from_string("00100"));
+
+ if (st.ffAD) {
+ if (st.ffADcemux) {
+ SigSpec S = st.ffADcemux->getPort(ID(S));
+ cell->setPort(ID(CEAD), st.ffADcepol ? S : pm.module->Not(NEW_ID, S));
+ }
+ else
+ cell->setPort(ID(CEAD), State::S1);
+ cell->setParam(ID(ADREG), 1);
+ }
+
+ cell->setParam(ID(USE_DPORT), Const("TRUE"));
+
+ pm.autoremove(st.preAdd);
+ }
+ if (st.postAdd) {
+ log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type));
+
+ SigSpec &opmode = cell->connections_.at(ID(OPMODE));
+ if (st.postAddMux) {
+ log_assert(st.ffP);
+ opmode[4] = st.postAddMux->getPort(ID(S));
+ pm.autoremove(st.postAddMux);
+ }
+ else if (st.ffP && st.sigC == st.sigP)
+ opmode[4] = State::S0;
+ else
+ opmode[4] = State::S1;
+ opmode[6] = State::S0;
+ opmode[5] = State::S1;
+
+ if (opmode[4] != State::S0) {
+ if (st.postAddMuxAB == ID(A))
+ st.sigC.extend_u0(48, st.postAdd->getParam(ID(B_SIGNED)).as_bool());
+ else
+ st.sigC.extend_u0(48, st.postAdd->getParam(ID(A_SIGNED)).as_bool());
+ cell->setPort(ID(C), st.sigC);
+ }
+
+ pm.autoremove(st.postAdd);
+ }
+ if (st.overflow) {
+ log(" overflow %s (%s)\n", log_id(st.overflow), log_id(st.overflow->type));
+ cell->setParam(ID(USE_PATTERN_DETECT), Const("PATDET"));
+ cell->setParam(ID(SEL_PATTERN), Const("PATTERN"));
+ cell->setParam(ID(SEL_MASK), Const("MASK"));
+
+ if (st.overflow->type == ID($ge)) {
+ Const B = st.overflow->getPort(ID(B)).as_const();
+ log_assert(std::count(B.bits.begin(), B.bits.end(), State::S1) == 1);
+ // Since B is an exact power of 2, subtract 1
+ // by inverting all bits up until hitting
+ // that one hi bit
+ for (auto &b : B.bits)
+ if (b == State::S0) b = State::S1;
+ else if (b == State::S1) {
+ b = State::S0;
+ break;
+ }
+ B.extu(48);
+
+ cell->setParam(ID(MASK), B);
+ cell->setParam(ID(PATTERN), Const(0, 48));
+ cell->setPort(ID(OVERFLOW), st.overflow->getPort(ID(Y)));
+ }
+ else log_abort();
+
+ pm.autoremove(st.overflow);
+ }
+
+ if (st.clock != SigBit())
+ {
+ cell->setPort(ID(CLK), st.clock);
+
+ auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) {
+ SigSpec D = ff->getPort(ID(D));
+ SigSpec Q = pm.sigmap(ff->getPort(ID(Q)));
+ if (!A.empty())
+ A.replace(Q, D);
+ if (rstmux) {
+ SigSpec Y = rstmux->getPort(ID(Y));
+ SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B));
+ if (!A.empty())
+ A.replace(Y, AB);
+ if (rstport != IdString()) {
+ SigSpec S = rstmux->getPort(ID(S));
+ cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S));
+ }
+ }
+ else if (rstport != IdString())
+ cell->setPort(rstport, State::S0);
+ if (cemux) {
+ SigSpec Y = cemux->getPort(ID(Y));
+ SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A));
+ SigSpec S = cemux->getPort(ID(S));
+ if (!A.empty())
+ A.replace(Y, BA);
+ cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S));
+ }
+ else
+ cell->setPort(ceport, State::S1);
+
+ for (auto c : Q.chunks()) {
+ auto it = c.wire->attributes.find(ID(init));
+ if (it == c.wire->attributes.end())
+ continue;
+ for (int i = c.offset; i < c.offset+c.width; i++) {
+ log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
+ it->second[i] = State::Sx;
+ }
+ }
+ };
+
+ if (st.ffA2) {
+ SigSpec A = cell->getPort(ID(A));
+ f(A, st.ffA2, st.ffA2cemux, st.ffA2cepol, ID(CEA2), st.ffA2rstmux, st.ffArstpol, ID(RSTA));
+ if (st.ffA1) {
+ f(A, st.ffA1, st.ffA1cemux, st.ffA1cepol, ID(CEA1), st.ffA1rstmux, st.ffArstpol, IdString());
+ cell->setParam(ID(AREG), 2);
+ cell->setParam(ID(ACASCREG), 2);
+ }
+ else {
+ cell->setParam(ID(AREG), 1);
+ cell->setParam(ID(ACASCREG), 1);
+ }
+ pm.add_siguser(A, cell);
+ cell->setPort(ID(A), A);
+ }
+ if (st.ffB2) {
+ SigSpec B = cell->getPort(ID(B));
+ f(B, st.ffB2, st.ffB2cemux, st.ffB2cepol, ID(CEB2), st.ffB2rstmux, st.ffBrstpol, ID(RSTB));
+ if (st.ffB1) {
+ f(B, st.ffB1, st.ffB1cemux, st.ffB1cepol, ID(CEB1), st.ffB1rstmux, st.ffBrstpol, IdString());
+ cell->setParam(ID(BREG), 2);
+ cell->setParam(ID(BCASCREG), 2);
+ }
+ else {
+ cell->setParam(ID(BREG), 1);
+ cell->setParam(ID(BCASCREG), 1);
+ }
+ pm.add_siguser(B, cell);
+ cell->setPort(ID(B), B);
+ }
+ if (st.ffD) {
+ SigSpec D = cell->getPort(ID(D));
+ f(D, st.ffD, st.ffDcemux, st.ffDcepol, ID(CED), st.ffDrstmux, st.ffDrstpol, ID(RSTD));
+ pm.add_siguser(D, cell);
+ cell->setPort(ID(D), D);
+ cell->setParam(ID(DREG), 1);
+ }
+ if (st.ffM) {
+ SigSpec M; // unused
+ f(M, st.ffM, st.ffMcemux, st.ffMcepol, ID(CEM), st.ffMrstmux, st.ffMrstpol, ID(RSTM));
+ st.ffM->connections_.at(ID(Q)).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM)));
+ cell->setParam(ID(MREG), State::S1);
+ }
+ if (st.ffP) {
+ SigSpec P; // unused
+ f(P, st.ffP, st.ffPcemux, st.ffPcepol, ID(CEP), st.ffPrstmux, st.ffPrstpol, ID(RSTP));
+ st.ffP->connections_.at(ID(Q)).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
+ cell->setParam(ID(PREG), State::S1);
+ }
+
+ log(" clock: %s (%s)", log_signal(st.clock), "posedge");
+
+ if (st.ffA2) {
+ log(" ffA2:%s", log_id(st.ffA2));
+ if (st.ffA1)
+ log(" ffA1:%s", log_id(st.ffA1));
+ }
+
+ if (st.ffAD)
+ log(" ffAD:%s", log_id(st.ffAD));
+
+ if (st.ffB2) {
+ log(" ffB2:%s", log_id(st.ffB2));
+ if (st.ffB1)
+ log(" ffB1:%s", log_id(st.ffB1));
+ }
+
+ if (st.ffD)
+ log(" ffD:%s", log_id(st.ffD));
+
+ if (st.ffM)
+ log(" ffM:%s", log_id(st.ffM));
+
+ if (st.ffP)
+ log(" ffP:%s", log_id(st.ffP));
+ }
+ log("\n");
+
+ SigSpec P = st.sigP;
+ if (GetSize(P) < 48)
+ P.append(pm.module->addWire(NEW_ID, 48-GetSize(P)));
+ cell->setPort(ID(P), P);
+
+ pm.blacklist(cell);
+}
+
+void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm)
+{
+ auto &st = pm.st_xilinx_dsp_packC;
+
+ log_debug("Analysing %s.%s for Xilinx DSP packing (CREG).\n", log_id(pm.module), log_id(st.dsp));
+ log_debug("ffC: %s %s %s\n", log_id(st.ffC, "--"), log_id(st.ffCcemux, "--"), log_id(st.ffCrstmux, "--"));
+
+ Cell *cell = st.dsp;
+
+ if (st.clock != SigBit())
+ {
+ cell->setPort(ID(CLK), st.clock);
+
+ auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) {
+ SigSpec D = ff->getPort(ID(D));
+ SigSpec Q = pm.sigmap(ff->getPort(ID(Q)));
+ if (!A.empty())
+ A.replace(Q, D);
+ if (rstmux) {
+ SigSpec Y = rstmux->getPort(ID(Y));
+ SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B));
+ if (!A.empty())
+ A.replace(Y, AB);
+ if (rstport != IdString()) {
+ SigSpec S = rstmux->getPort(ID(S));
+ cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S));
+ }
+ }
+ else if (rstport != IdString())
+ cell->setPort(rstport, State::S0);
+ if (cemux) {
+ SigSpec Y = cemux->getPort(ID(Y));
+ SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A));
+ SigSpec S = cemux->getPort(ID(S));
+ if (!A.empty())
+ A.replace(Y, BA);
+ cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S));
+ }
+ else
+ cell->setPort(ceport, State::S1);
+
+ for (auto c : Q.chunks()) {
+ auto it = c.wire->attributes.find(ID(init));
+ if (it == c.wire->attributes.end())
+ continue;
+ for (int i = c.offset; i < c.offset+c.width; i++) {
+ log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
+ it->second[i] = State::Sx;
+ }
+ }
+ };
+
+ if (st.ffC) {
+ SigSpec C = cell->getPort(ID(C));
+ f(C, st.ffC, st.ffCcemux, st.ffCcepol, ID(CEC), st.ffCrstmux, st.ffCrstpol, ID(RSTC));
+ pm.add_siguser(C, cell);
+ cell->setPort(ID(C), C);
+ cell->setParam(ID(CREG), 1);
+ }
+
+ log(" clock: %s (%s)", log_signal(st.clock), "posedge");
+
+ if (st.ffC)
+ log(" ffC:%s", log_id(st.ffC));
+ log("\n");
+ }
+
+ pm.blacklist(cell);
+}
+
+struct XilinxDspPass : public Pass {
+ XilinxDspPass() : Pass("xilinx_dsp", "Xilinx: pack resources into DSPs") { }
+ void help() YS_OVERRIDE
+ {
+ // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+ log("\n");
+ log(" xilinx_dsp [options] [selection]\n");
+ log("\n");
+ log("Pack input registers (A2, A1, B2, B1, C, D, AD; with optional enable/reset),\n");
+ log("pipeline registers (M; with optional enable/reset), output registers (P; with\n");
+ log("optional enable/reset), pre-adder and/or post-adder into Xilinx DSP resources.\n");
+ log("\n");
+ log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n");
+ log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n");
+ log("used to override the current accumulation result with a new value, which will\n");
+ log("be added to the multiplier result to form the next accumulation result.\n");
+ log("\n");
+ log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n");
+ log("connections (optionally, where 'P' is right-shifted by 17-bits and used as an\n");
+ log("input to the post-adder -- a pattern common for summing partial products to\n");
+ log("implement wide multipliers). Limited support also exists for similar cascading\n");
+ log("for A and B using '[AB]COUT' -> '[AB]CIN'. Currently, cascade chains are limited\n");
+ log("to a maximum length of 20 cells, corresponding to the smallest Xilinx 7 Series\n");
+ log("device.\n");
+ log("\n");
+ log("\n");
+ log("Experimental feature: addition/subtractions less than 12 or 24 bits with the\n");
+ log("'(* use_dsp=\"simd\" *)' attribute attached to the output wire or attached to\n");
+ log("the add/subtract operator will cause those operations to be implemented using\n");
+ log("the 'SIMD' feature of DSPs.\n");
+ log("\n");
+ log("Experimental feature: the presence of a `$ge' cell attached to the registered\n");
+ log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n");
+ log("into using the DSP48E1's pattern detector feature for overflow detection.\n");
+ log("\n");
+ }
+ void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+ {
+ log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n");
+
+ size_t argidx;
+ for (argidx = 1; argidx < args.size(); argidx++)
+ {
+ // if (args[argidx] == "-singleton") {
+ // singleton_mode = true;
+ // continue;
+ // }
+ break;
+ }
+ extra_args(args, argidx, design);
+
+ for (auto module : design->selected_modules()) {
+ // Experimental feature: pack $add/$sub cells with
+ // (* use_dsp48="simd" *) into DSP48E1's using its
+ // SIMD feature
+ xilinx_simd_pack(module, module->selected_cells());
+
+ // Match for all features ([ABDMP][12]?REG, pre-adder,
+ // post-adder, pattern detector, etc.) except for CREG
+ {
+ xilinx_dsp_pm pm(module, module->selected_cells());
+ pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
+ }
+ // Separating out CREG packing is necessary since there
+ // is no guarantee that the cell ordering corresponds
+ // to the "expected" case (i.e. the order in which
+ // they appear in the source) thus the possiblity
+ // existed that a register got packed as a CREG into a
+ // downstream DSP that should have otherwise been a
+ // PREG of an upstream DSP that had not been visited
+ // yet
+ {
+ xilinx_dsp_CREG_pm pm(module, module->selected_cells());
+ pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
+ }
+ // Lastly, identify and utilise PCOUT -> PCIN,
+ // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
+ // chains
+ {
+ xilinx_dsp_cascade_pm pm(module, module->selected_cells());
+ pm.run_xilinx_dsp_cascade();
+ }
+ }
+ }
+} XilinxDspPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg
new file mode 100644
index 000000000..604aa222b
--- /dev/null
+++ b/passes/pmgen/xilinx_dsp.pmg
@@ -0,0 +1,725 @@
+// This file describes the main pattern matcher setup (of three total) that
+// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
+// At a high level, it works as follows:
+// ( 1) Starting from a DSP48E1 cell
+// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
+// (attached to at most two $mux cells that implement clock-enable or
+// reset functionality, using a subpattern discussed below)
+// If ADREG matched, treat 'A' input as input of ADREG
+// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
+// (pre-adder)
+// ( 4) If pre-adder was present, find match 'A' input for A2REG
+// If pre-adder was not present, move ADREG to A2REG
+// If A2REG, then match 'A' input for A1REG
+// ( 5) Match 'B' input for B2REG
+// If B2REG, then match 'B' input for B1REG
+// ( 6) Match 'D' input for DREG
+// ( 7) Match 'P' output that exclusively drives an MREG
+// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
+// cell (post-adder).
+// The other input to the adder is assumed to come in from the 'C' input
+// (note: 'P' -> 'C' connections that exist for accumulators are
+// recognised in xilinx_dsp.cc).
+// ( 9) Match 'P' output that exclusively drives a PREG
+// (10) If post-adder and PREG both present, match for a $mux cell driving
+// the 'C' input, where one of the $mux's inputs is the PREG output.
+// This indicates an accumulator situation, and one where a $mux exists
+// to override the accumulated value:
+// +--------------------------------+
+// | ____ |
+// +--| \ |
+// |$mux|-+ |
+// 'C' ---|____/ | |
+// | /-------\ +----+ |
+// +----+ +-| post- |___|PREG|---+ 'P'
+// |MREG|------ | adder | +----+
+// +----+ \-------/
+// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
+// to the 'P' output where it is compared to a constant that is a
+// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
+// In this scenario, the pattern detector functionality of a DSP48E1 can
+// to implement this function
+// Notes:
+// - The intention of this pattern matcher is for it to be compatible with
+// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
+// user instantiations that may already contain the cells being packed...
+// (though the latter is currently untested)
+// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used
+// for each *REG match, it has been factored out into two subpatterns:
+// in_dffe and out_dffe located at the bottom of this file.
+// - Matching for pattern detector features is currently incomplete. For
+// example, matching for underflow as well as overflow detection is
+// possible, as would auto-reset, enabling saturated arithmetic, detecting
+// custom patterns, etc.
+
+pattern xilinx_dsp_pack
+
+state <SigBit> clock
+state <SigSpec> sigA sigB sigC sigD sigM sigP
+state <IdString> postAddAB postAddMuxAB
+state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
+state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
+state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
+state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
+state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
+
+// Variables used for subpatterns
+state <SigSpec> argQ argD
+state <bool> ffcepol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffcemux dffrstmux
+udata <bool> dffcepol dffrstpol
+
+// (1) Starting from a DSP48E1 cell
+match dsp
+ select dsp->type.in(\DSP48E1)
+endmatch
+
+code sigA sigB sigC sigD sigM clock
+ auto unextend = [](const SigSpec &sig) {
+ int i;
+ for (i = GetSize(sig)-1; i > 0; i--)
+ if (sig[i] != sig[i-1])
+ break;
+ // Do not remove non-const sign bit
+ if (sig[i].wire)
+ ++i;
+ return sig.extract(0, i);
+ };
+ sigA = unextend(port(dsp, \A));
+ sigB = unextend(port(dsp, \B));
+
+ sigC = port(dsp, \C, SigSpec());
+ sigD = port(dsp, \D, SigSpec());
+
+ SigSpec P = port(dsp, \P);
+ if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") {
+ // Only care about those bits that are used
+ int i;
+ for (i = 0; i < GetSize(P); i++) {
+ if (nusers(P[i]) <= 1)
+ break;
+ sigM.append(P[i]);
+ }
+ log_assert(nusers(P.extract_end(i)) <= 1);
+ // This sigM could have no users if downstream sinks (e.g. $add) is
+ // narrower than $mul result, for example
+ if (sigM.empty())
+ reject;
+ }
+ else
+ sigM = P;
+
+ clock = port(dsp, \CLK, SigBit());
+endcode
+
+// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
+// (attached to at most two $mux cells that implement clock-enable or
+// reset functionality, using a subpattern discussed above)
+// If matched, treat 'A' input as input of ADREG
+code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
+ if (param(dsp, \ADREG).as_int() == 0) {
+ argQ = sigA;
+ subpattern(in_dffe);
+ if (dff) {
+ ffAD = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffADrstmux = dffrstmux;
+ ffADrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffADcemux = dffcemux;
+ ffADcepol = dffcepol;
+ }
+ sigA = dffD;
+ }
+ }
+endcode
+
+// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
+// (pre-adder)
+match preAdd
+ if sigD.empty() || sigD.is_fully_zero()
+ // Ensure that preAdder not already used
+ if param(dsp, \USE_DPORT, Const("FALSE")).decode_string() == "FALSE"
+ if port(dsp, \INMODE, Const(0, 5)).is_fully_zero()
+
+ select preAdd->type.in($add)
+ // Output has to be 25 bits or less
+ select GetSize(port(preAdd, \Y)) <= 25
+ select nusers(port(preAdd, \Y)) == 2
+ choice <IdString> AB {\A, \B}
+ // A port has to be 30 bits or less
+ select GetSize(port(preAdd, AB)) <= 30
+ define <IdString> BA (AB == \A ? \B : \A)
+ // D port has to be 25 bits or less
+ select GetSize(port(preAdd, BA)) <= 25
+ index <SigSpec> port(preAdd, \Y) === sigA
+
+ optional
+endmatch
+
+code sigA sigD
+ if (preAdd) {
+ sigA = port(preAdd, \A);
+ sigD = port(preAdd, \B);
+ }
+endcode
+
+// (4) If pre-adder was present, find match 'A' input for A2REG
+// If pre-adder was not present, move ADREG to A2REG
+// Then match 'A' input for A1REG
+code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
+ // Only search for ffA2 if there was a pre-adder
+ // (otherwise ffA2 would have been matched as ffAD)
+ if (preAdd) {
+ if (param(dsp, \AREG).as_int() == 0) {
+ argQ = sigA;
+ subpattern(in_dffe);
+ if (dff) {
+ ffA2 = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffA2rstmux = dffrstmux;
+ ffArstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffA2cepol = dffcepol;
+ ffA2cemux = dffcemux;
+ }
+ sigA = dffD;
+ }
+ }
+ }
+ // And if there wasn't a pre-adder,
+ // move AD register to A
+ else if (ffAD) {
+ log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux);
+ std::swap(ffA2, ffAD);
+ std::swap(ffA2cemux, ffADcemux);
+ std::swap(ffA2rstmux, ffADrstmux);
+ ffA2cepol = ffADcepol;
+ ffArstpol = ffADrstpol;
+ }
+
+ // Now attempt to match A1
+ if (ffA2) {
+ argQ = sigA;
+ subpattern(in_dffe);
+ if (dff) {
+ if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr))
+ goto ffA1_end;
+ if (dffrstmux) {
+ if (ffArstpol != dffrstpol)
+ goto ffA1_end;
+ if (port(ffA2rstmux, \S) != port(dffrstmux, \S))
+ goto ffA1_end;
+ ffA1rstmux = dffrstmux;
+ }
+
+ ffA1 = dff;
+ clock = dffclock;
+
+ if (dffcemux) {
+ ffA1cemux = dffcemux;
+ ffA1cepol = dffcepol;
+ }
+ sigA = dffD;
+
+ffA1_end: ;
+ }
+ }
+endcode
+
+// (5) Match 'B' input for B2REG
+// If B2REG, then match 'B' input for B1REG
+code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
+ if (param(dsp, \BREG).as_int() == 0) {
+ argQ = sigB;
+ subpattern(in_dffe);
+ if (dff) {
+ ffB2 = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffB2rstmux = dffrstmux;
+ ffBrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffB2cemux = dffcemux;
+ ffB2cepol = dffcepol;
+ }
+ sigB = dffD;
+
+ // Now attempt to match B1
+ if (ffB2) {
+ argQ = sigB;
+ subpattern(in_dffe);
+ if (dff) {
+ if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr))
+ goto ffB1_end;
+ if (dffrstmux) {
+ if (ffBrstpol != dffrstpol)
+ goto ffB1_end;
+ if (port(ffB2rstmux, \S) != port(dffrstmux, \S))
+ goto ffB1_end;
+ ffB1rstmux = dffrstmux;
+ }
+
+ ffB1 = dff;
+ clock = dffclock;
+
+ if (dffcemux) {
+ ffB1cemux = dffcemux;
+ ffB1cepol = dffcepol;
+ }
+ sigB = dffD;
+
+ffB1_end: ;
+ }
+ }
+
+ }
+ }
+endcode
+
+// (6) Match 'D' input for DREG
+code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
+ if (param(dsp, \DREG).as_int() == 0) {
+ argQ = sigD;
+ subpattern(in_dffe);
+ if (dff) {
+ ffD = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffDrstmux = dffrstmux;
+ ffDrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffDcemux = dffcemux;
+ ffDcepol = dffcepol;
+ }
+ sigD = dffD;
+ }
+ }
+endcode
+
+// (7) Match 'P' output that exclusively drives an MREG
+code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
+ if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
+ argD = sigM;
+ subpattern(out_dffe);
+ if (dff) {
+ ffM = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffMrstmux = dffrstmux;
+ ffMrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffMcemux = dffcemux;
+ ffMcepol = dffcepol;
+ }
+ sigM = dffQ;
+ }
+ }
+ sigP = sigM;
+endcode
+
+// (8) Match 'P' output that exclusively drives one of two inputs to an $add
+// cell (post-adder).
+// The other input to the adder is assumed to come in from the 'C' input
+// (note: 'P' -> 'C' connections that exist for accumulators are
+// recognised in xilinx_dsp.cc).
+match postAdd
+ // Ensure that Z mux is not already used
+ if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
+
+ select postAdd->type.in($add)
+ select GetSize(port(postAdd, \Y)) <= 48
+ choice <IdString> AB {\A, \B}
+ select nusers(port(postAdd, AB)) <= 3
+ filter ffMcemux || nusers(port(postAdd, AB)) == 2
+ filter !ffMcemux || nusers(port(postAdd, AB)) == 3
+
+ index <SigBit> port(postAdd, AB)[0] === sigP[0]
+ filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
+ filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
+ // Check that remainder of AB is a sign-extension
+ define <bool> AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool())
+ filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+ set postAddAB AB
+ optional
+endmatch
+
+code sigC sigP
+ if (postAdd) {
+ sigC = port(postAdd, postAddAB == \A ? \B : \A);
+ sigP = port(postAdd, \Y);
+ }
+endcode
+
+// (9) Match 'P' output that exclusively drives a PREG
+code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
+ if (param(dsp, \PREG).as_int() == 0) {
+ int users = 2;
+ // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux
+ if (ffMcemux && !postAdd) users++;
+ if (nusers(sigP) == users) {
+ argD = sigP;
+ subpattern(out_dffe);
+ if (dff) {
+ ffP = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffPrstmux = dffrstmux;
+ ffPrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffPcemux = dffcemux;
+ ffPcepol = dffcepol;
+ }
+ sigP = dffQ;
+ }
+ }
+ }
+endcode
+
+// (10) If post-adder and PREG both present, match for a $mux cell driving
+// the 'C' input, where one of the $mux's inputs is the PREG output.
+// This indicates an accumulator situation, and one where a $mux exists
+// to override the accumulated value:
+// +--------------------------------+
+// | ____ |
+// +--| \ |
+// |$mux|-+ |
+// 'C' ---|____/ | |
+// | /-------\ +----+ |
+// +----+ +-| post- |___|PREG|---+ 'P'
+// |MREG|------ | adder | +----+
+// +----+ \-------/
+match postAddMux
+ if postAdd
+ if ffP
+ select postAddMux->type.in($mux)
+ select nusers(port(postAddMux, \Y)) == 2
+ choice <IdString> AB {\A, \B}
+ index <SigSpec> port(postAddMux, AB) === sigP
+ index <SigSpec> port(postAddMux, \Y) === sigC
+ set postAddMuxAB AB
+ optional
+endmatch
+
+code sigC
+ if (postAddMux)
+ sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
+endcode
+
+// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
+// the 'P' output where it is compared to a constant that is a power-of-2:
+// e.g. `assign overflow = (PREG >= 2**40);`
+// In this scenario, the pattern detector functionality of a DSP48E1 can
+// to implement this function
+match overflow
+ if ffP
+ if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
+ select overflow->type.in($ge)
+ select GetSize(port(overflow, \Y)) <= 48
+ select port(overflow, \B).is_fully_const()
+ define <Const> B port(overflow, \B).as_const()
+ select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1
+ index <SigSpec> port(overflow, \A) === sigP
+ optional
+endmatch
+
+code
+ accept;
+endcode
+
+// #######################
+
+// Subpattern for matching against input registers, based on knowledge of the
+// 'Q' input. Typically, identifying registers with clock-enable and reset
+// capability would be a task would be handled by other Yosys passes such as
+// dff2dffe, but since DSP inference happens much before this, these patterns
+// have to be manually identified.
+// At a high level:
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// one that exclusively drives the 'D' input of the $dff, with one of its
+// $mux inputs being fully zero
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+subpattern in_dffe
+arg argD argQ clock
+
+code
+ dff = nullptr;
+ for (const auto &c : argQ.chunks()) {
+ // Abandon matches when 'Q' is a constant
+ if (!c.wire)
+ reject;
+ // Abandon matches when 'Q' has the keep attribute set
+ if (c.wire->get_bool_attribute(\keep))
+ reject;
+ // Abandon matches when 'Q' has a non-zero init attribute set
+ // (not supported by DSP48E1)
+ Const init = c.wire->attributes.at(\init, Const());
+ if (!init.empty())
+ for (auto b : init.extract(c.offset, c.width))
+ if (b != State::Sx && b != State::S0)
+ reject;
+ }
+endcode
+
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+ // Check that the rest of argQ is present
+ filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ filter clock == SigBit() || port(ff, \CLK) == clock
+
+ set ffoffset offset
+endmatch
+
+code argQ argD
+ SigSpec Q = port(ff, \Q);
+ dff = ff;
+ dffclock = port(ff, \CLK);
+ dffD = argQ;
+ argD = port(ff, \D);
+ argQ = Q;
+ dffD.replace(argQ, argD);
+ // Only search for ffrstmux if dffD only
+ // has two (ff, ffrstmux) users
+ if (nusers(dffD) > 2)
+ argD = SigSpec();
+endcode
+
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// exclusively drives the 'D' input of the $dff, with one of the $mux
+// inputs being fully zero
+match ffrstmux
+ if !argD.empty()
+ select ffrstmux->type.in($mux)
+ index <SigSpec> port(ffrstmux, \Y) === argD
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ define <bool> pol (BA == \B)
+ set ffrstpol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffrstmux) {
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ argD = port(ffrstmux, ffrstpol ? \A : \B);
+ dffD.replace(port(ffrstmux, \Y), argD);
+
+ // Only search for ffcemux if argQ has at
+ // least 3 users (ff, <upstream>, ffrstmux) and
+ // dffD only has two (ff, ffrstmux)
+ if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+ argD = SigSpec();
+ }
+ else
+ dffrstmux = nullptr;
+endcode
+
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+match ffcemux
+ if !argD.empty()
+ select ffcemux->type.in($mux)
+ index <SigSpec> port(ffcemux, \Y) === argD
+ choice <IdString> AB {\A, \B}
+ index <SigSpec> port(ffcemux, AB) === argQ
+ define <bool> pol (AB == \A)
+ set ffcepol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffcemux) {
+ dffcemux = ffcemux;
+ dffcepol = ffcepol;
+ argD = port(ffcemux, ffcepol ? \B : \A);
+ dffD.replace(port(ffcemux, \Y), argD);
+ }
+ else
+ dffcemux = nullptr;
+endcode
+
+// #######################
+
+// Subpattern for matching against output registers, based on knowledge of the
+// 'D' input.
+// At a high level:
+// (1) Starting from an optional $mux cell that implements clock enable
+// semantics --- one where the given 'D' argument (partially or fully)
+// drives one of its two inputs
+// (2) Starting from, or continuing onto, another optional $mux cell that
+// implements synchronous reset semantics --- one where the given 'D'
+// argument (or the clock enable $mux output) drives one of its two inputs
+// and where the other input is fully zero
+// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
+// output of the previous clock enable or reset $mux cells)
+subpattern out_dffe
+arg argD argQ clock
+
+code
+ dff = nullptr;
+ for (auto c : argD.chunks())
+ // Abandon matches when 'D' has the keep attribute set
+ if (c.wire->get_bool_attribute(\keep))
+ reject;
+endcode
+
+// (1) Starting from an optional $mux cell that implements clock enable
+// semantics --- one where the given 'D' argument (partially or fully)
+// drives one of its two inputs
+match ffcemux
+ select ffcemux->type.in($mux)
+ // ffcemux output must have two users: ffcemux and ff.D
+ select nusers(port(ffcemux, \Y)) == 2
+
+ choice <IdString> AB {\A, \B}
+ // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s)
+ select nusers(port(ffcemux, AB)) >= 3
+
+ slice offset GetSize(port(ffcemux, \Y))
+ define <IdString> BA (AB == \A ? \B : \A)
+ index <SigBit> port(ffcemux, BA)[offset] === argD[0]
+
+ // Check that the rest of argD is present
+ filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD)
+ filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD
+
+ set ffoffset offset
+ define <bool> pol (AB == \A)
+ set ffcepol pol
+
+ semioptional
+endmatch
+
+code argD argQ
+ dffcemux = ffcemux;
+ if (ffcemux) {
+ SigSpec BA = port(ffcemux, ffcepol ? \B : \A);
+ SigSpec Y = port(ffcemux, \Y);
+ argQ = argD;
+ argD.replace(BA, Y);
+ argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B));
+
+ dffcemux = ffcemux;
+ dffcepol = ffcepol;
+ }
+endcode
+
+// (2) Starting from, or continuing onto, another optional $mux cell that
+// implements synchronous reset semantics --- one where the given 'D'
+// argument (or the clock enable $mux output) drives one of its two inputs
+// and where the other input is fully zero
+match ffrstmux
+ select ffrstmux->type.in($mux)
+ // ffrstmux output must have two users: ffrstmux and ff.D
+ select nusers(port(ffrstmux, \Y)) == 2
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ slice offset GetSize(port(ffrstmux, \Y))
+ define <IdString> AB (BA == \B ? \A : \B)
+ index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
+
+ // Check that offset is consistent
+ filter !ffcemux || ffoffset == offset
+ // Check that the rest of argD is present
+ filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD)
+ filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
+
+ set ffoffset offset
+ define <bool> pol (AB == \A)
+ set ffrstpol pol
+
+ semioptional
+endmatch
+
+code argD argQ
+ dffrstmux = ffrstmux;
+ if (ffrstmux) {
+ SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
+ SigSpec Y = port(ffrstmux, \Y);
+ argD.replace(AB, Y);
+
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ }
+endcode
+
+// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
+// output of the previous clock enable or reset $mux cells)
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \D)[offset] === argD[0]
+
+ // Check that offset is consistent
+ filter (!ffcemux && !ffrstmux) || ffoffset == offset
+ // Check that the rest of argD is present
+ filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
+ filter port(ff, \D).extract(offset, GetSize(argD)) == argD
+ // Check that FF.Q is connected to CE-mux
+ filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ filter clock == SigBit() || port(ff, \CLK) == clock
+
+ set ffoffset offset
+endmatch
+
+code argQ
+ SigSpec D = port(ff, \D);
+ SigSpec Q = port(ff, \Q);
+ if (!ffcemux) {
+ argQ = argD;
+ argQ.replace(D, Q);
+ }
+
+ // Abandon matches when 'Q' has a non-zero init attribute set
+ // (not supported by DSP48E1)
+ for (auto c : argQ.chunks()) {
+ Const init = c.wire->attributes.at(\init, Const());
+ if (!init.empty())
+ for (auto b : init.extract(c.offset, c.width))
+ if (b != State::Sx && b != State::S0)
+ reject;
+ }
+
+ dff = ff;
+ dffQ = argQ;
+ dffclock = port(ff, \CLK);
+endcode
diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg
new file mode 100644
index 000000000..a57043009
--- /dev/null
+++ b/passes/pmgen/xilinx_dsp_CREG.pmg
@@ -0,0 +1,234 @@
+// This file describes the second of three pattern matcher setups that
+// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
+// At a high level, it works as follows:
+// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
+// and (b) uses the 'C' port
+// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
+// (attached to at most two $mux cells that implement clock-enable or
+// reset functionality, using a subpattern discussed below)
+// Notes:
+// - Running CREG packing after xilinx_dsp_pack is necessary since there is no
+// guarantee that the cell ordering corresponds to the "expected" case (i.e.
+// the order in which they appear in the source) thus the possiblity existed
+// that a register got packed as a CREG into a downstream DSP that should
+// have otherwise been a PREG of an upstream DSP that had not been visited
+// yet
+// - The reason this is separated out from the xilinx_dsp.pmg file is
+// for efficiency --- each *.pmg file creates a class of the same basename,
+// which when constructed, creates a custom database tailored to the
+// pattern(s) contained within. Since the pattern in this file must be
+// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
+// to reconstruct this database. Separating the two patterns into
+// independent files causes two smaller, more specific, databases.
+
+pattern xilinx_dsp_packC
+
+udata <std::function<SigSpec(const SigSpec&)>> unextend
+state <SigBit> clock
+state <SigSpec> sigC sigP
+state <bool> ffCcepol ffCrstpol
+state <Cell*> ffC ffCcemux ffCrstmux
+
+// Variables used for subpatterns
+state <SigSpec> argQ argD
+state <bool> ffcepol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffcemux dffrstmux
+udata <bool> dffcepol dffrstpol
+
+// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
+// and (b) uses the 'C' port
+match dsp
+ select dsp->type.in(\DSP48E1)
+ select param(dsp, \CREG, 1).as_int() == 0
+ select nusers(port(dsp, \C, SigSpec())) > 1
+endmatch
+
+code sigC sigP clock
+ unextend = [](const SigSpec &sig) {
+ int i;
+ for (i = GetSize(sig)-1; i > 0; i--)
+ if (sig[i] != sig[i-1])
+ break;
+ // Do not remove non-const sign bit
+ if (sig[i].wire)
+ ++i;
+ return sig.extract(0, i);
+ };
+ sigC = unextend(port(dsp, \C, SigSpec()));
+
+ SigSpec P = port(dsp, \P);
+ if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") {
+ // Only care about those bits that are used
+ int i;
+ for (i = 0; i < GetSize(P); i++) {
+ if (nusers(P[i]) <= 1)
+ break;
+ sigP.append(P[i]);
+ }
+ log_assert(nusers(P.extract_end(i)) <= 1);
+ }
+ else
+ sigP = P;
+
+ clock = port(dsp, \CLK, SigBit());
+endcode
+
+// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
+// (attached to at most two $mux cells that implement clock-enable or
+// reset functionality, using the in_dffe subpattern)
+code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
+ argQ = sigC;
+ subpattern(in_dffe);
+ if (dff) {
+ ffC = dff;
+ clock = dffclock;
+ if (dffrstmux) {
+ ffCrstmux = dffrstmux;
+ ffCrstpol = dffrstpol;
+ }
+ if (dffcemux) {
+ ffCcemux = dffcemux;
+ ffCcepol = dffcepol;
+ }
+ sigC = dffD;
+ }
+endcode
+
+code
+ if (ffC)
+ accept;
+endcode
+
+// #######################
+
+// Subpattern for matching against input registers, based on knowledge of the
+// 'Q' input. Typically, identifying registers with clock-enable and reset
+// capability would be a task would be handled by other Yosys passes such as
+// dff2dffe, but since DSP inference happens much before this, these patterns
+// have to be manually identified.
+// At a high level:
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// one that exclusively drives the 'D' input of the $dff, with one of its
+// $mux inputs being fully zero
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+subpattern in_dffe
+arg argD argQ clock
+
+code
+ dff = nullptr;
+ for (const auto &c : argQ.chunks()) {
+ // Abandon matches when 'Q' is a constant
+ if (!c.wire)
+ reject;
+ // Abandon matches when 'Q' has the keep attribute set
+ if (c.wire->get_bool_attribute(\keep))
+ reject;
+ // Abandon matches when 'Q' has a non-zero init attribute set
+ // (not supported by DSP48E1)
+ Const init = c.wire->attributes.at(\init, Const());
+ for (auto b : init.extract(c.offset, c.width))
+ if (b != State::Sx && b != State::S0)
+ reject;
+ }
+endcode
+
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+ // Check that the rest of argQ is present
+ filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ filter clock == SigBit() || port(ff, \CLK) == clock
+
+ set ffoffset offset
+endmatch
+
+code argQ argD
+ SigSpec Q = port(ff, \Q);
+ dff = ff;
+ dffclock = port(ff, \CLK);
+ dffD = argQ;
+ argD = port(ff, \D);
+ argQ = Q;
+ dffD.replace(argQ, argD);
+ // Only search for ffrstmux if dffD only
+ // has two (ff, ffrstmux) users
+ if (nusers(dffD) > 2)
+ argD = SigSpec();
+endcode
+
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// exclusively drives the 'D' input of the $dff, with one of the $mux
+// inputs being fully zero
+match ffrstmux
+ if !argD.empty()
+ select ffrstmux->type.in($mux)
+ index <SigSpec> port(ffrstmux, \Y) === argD
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ define <bool> pol (BA == \B)
+ set ffrstpol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffrstmux) {
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ argD = port(ffrstmux, ffrstpol ? \A : \B);
+ dffD.replace(port(ffrstmux, \Y), argD);
+
+ // Only search for ffcemux if argQ has at
+ // least 3 users (ff, <upstream>, ffrstmux) and
+ // dffD only has two (ff, ffrstmux)
+ if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+ argD = SigSpec();
+ }
+ else
+ dffrstmux = nullptr;
+endcode
+
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+match ffcemux
+ if !argD.empty()
+ select ffcemux->type.in($mux)
+ index <SigSpec> port(ffcemux, \Y) === argD
+ choice <IdString> AB {\A, \B}
+ index <SigSpec> port(ffcemux, AB) === argQ
+ define <bool> pol (AB == \A)
+ set ffcepol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffcemux) {
+ dffcemux = ffcemux;
+ dffcepol = ffcepol;
+ argD = port(ffcemux, ffcepol ? \B : \A);
+ dffD.replace(port(ffcemux, \Y), argD);
+ }
+ else
+ dffcemux = nullptr;
+endcode
diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg
new file mode 100644
index 000000000..7a32df2b7
--- /dev/null
+++ b/passes/pmgen/xilinx_dsp_cascade.pmg
@@ -0,0 +1,427 @@
+// This file describes the third of three pattern matcher setups that
+// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
+// At a high level, it works as follows:
+// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
+// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
+// use the 'PCOUT' port
+// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
+// (b) has its Z multiplexer output set to the 'C' port, which is
+// driven by the 'P' output of the previous DSP cell, and (c) has its
+// 'PCIN' port unused
+// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
+// previous DSP cell right-shifted by 17 bits
+// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
+// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
+// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
+// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
+// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
+// opportunity exists by matching for a $dff-with-optional-clock-enable-
+// or-reset and checking that the 'D' input of this register is the same
+// as the 'A' input of the previous DSP
+// (4) Same as (3) but for BCOUT -> BCIN cascade
+// (5) Recursively go to (2.1) until no more matches possible, keeping track
+// of the longest possible chain found
+// (6) The longest chain is then divided into chunks of no more than
+// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
+// height of a DSP column) with each DSP in each chunk being rewritten
+// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
+// Notes:
+// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered
+// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need
+// not be the case --- [AB] cascades can exist independently of a P cascade
+// (though all three cascades must come from the same DSP). This situation
+// is not handled currently.
+// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently
+// conservative in that they examine the situation where (a) the previous
+// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no
+// registers enabled, and (c) that there exists only one additional register
+// between the upstream and downstream DSPs. This can certainly be relaxed
+// to identify situations ranging from (i) neither DSP uses any registers,
+// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and
+// there exists a further 2 registers between them. This remains a TODO
+// item.
+
+pattern xilinx_dsp_cascade
+
+udata <std::function<SigSpec(const SigSpec&)>> unextend
+udata <vector<std::tuple<Cell*,int,int,int>>> chain longest_chain
+state <Cell*> next
+state <SigSpec> clock
+state <int> AREG BREG
+
+// Variables used for subpatterns
+state <SigSpec> argQ argD
+state <bool> ffcepol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffcemux dffrstmux
+udata <bool> dffcepol dffrstpol
+
+code
+#define MAX_DSP_CASCADE 20
+endcode
+
+// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
+// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
+// use the 'PCOUT' port
+match first
+ select first->type.in(\DSP48E1)
+ select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")
+ select nusers(port(first, \PCOUT, SigSpec())) <= 1
+endmatch
+
+// (6) The longest chain is then divided into chunks of no more than
+// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
+// height of a DSP column) with each DSP in each chunk being rewritten
+// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
+code
+ longest_chain.clear();
+ chain.emplace_back(first, -1, -1, -1);
+ subpattern(tail);
+finally
+ chain.pop_back();
+ log_assert(chain.empty());
+ if (GetSize(longest_chain) > 1) {
+ Cell *dsp = std::get<0>(longest_chain.front());
+
+ Cell *dsp_pcin;
+ int P, AREG, BREG;
+ for (int i = 1; i < GetSize(longest_chain); i++) {
+ std::tie(dsp_pcin,P,AREG,BREG) = longest_chain[i];
+
+ if (i % MAX_DSP_CASCADE > 0) {
+ if (P >= 0) {
+ Wire *cascade = module->addWire(NEW_ID, 48);
+ dsp_pcin->setPort(ID(C), Const(0, 48));
+ dsp_pcin->setPort(ID(PCIN), cascade);
+ dsp->setPort(ID(PCOUT), cascade);
+ add_siguser(cascade, dsp_pcin);
+ add_siguser(cascade, dsp);
+
+ SigSpec opmode = port(dsp_pcin, \OPMODE, Const(0, 7));
+ if (P == 17)
+ opmode[6] = State::S1;
+ else if (P == 0)
+ opmode[6] = State::S0;
+ else log_abort();
+
+ opmode[5] = State::S0;
+ opmode[4] = State::S1;
+ dsp_pcin->setPort(\OPMODE, opmode);
+
+ log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
+ }
+ if (AREG >= 0) {
+ Wire *cascade = module->addWire(NEW_ID, 30);
+ dsp_pcin->setPort(ID(A), Const(0, 30));
+ dsp_pcin->setPort(ID(ACIN), cascade);
+ dsp->setPort(ID(ACOUT), cascade);
+ add_siguser(cascade, dsp_pcin);
+ add_siguser(cascade, dsp);
+
+ dsp->setParam(ID(ACASCREG), AREG);
+ dsp_pcin->setParam(ID(A_INPUT), Const("CASCADE"));
+
+ log_debug("ACOUT -> ACIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
+ }
+ if (BREG >= 0) {
+ Wire *cascade = module->addWire(NEW_ID, 18);
+ dsp_pcin->setPort(ID(B), Const(0, 18));
+ dsp_pcin->setPort(ID(BCIN), cascade);
+ dsp->setPort(ID(BCOUT), cascade);
+ add_siguser(cascade, dsp_pcin);
+ add_siguser(cascade, dsp);
+
+ dsp->setParam(ID(BCASCREG), BREG);
+ dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE"));
+
+ log_debug("BCOUT -> BCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
+ }
+ }
+ else {
+ log_debug(" Blocking %s -> %s cascade (exceeds max: %d)\n", log_id(dsp), log_id(dsp_pcin), MAX_DSP_CASCADE);
+ }
+
+ dsp = dsp_pcin;
+ }
+
+ accept;
+ }
+endcode
+
+// ------------------------------------------------------------------
+
+subpattern tail
+arg first
+arg next
+
+// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
+// (b) has its Z multiplexer output set to the 'C' port, which is
+// driven by the 'P' output of the previous DSP cell, and (c) has its
+// 'PCIN' port unused
+match nextP
+ select nextP->type.in(\DSP48E1)
+ select !param(nextP, \CREG, State::S1).as_bool()
+ select port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")
+ select nusers(port(nextP, \C, SigSpec())) > 1
+ select nusers(port(nextP, \PCIN, SigSpec())) == 0
+ index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0]
+ semioptional
+endmatch
+
+// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
+// previous DSP cell right-shifted by 17 bits
+match nextP_shift17
+ if !nextP
+ select nextP_shift17->type.in(\DSP48E1)
+ select !param(nextP_shift17, \CREG, State::S1).as_bool()
+ select port(nextP_shift17, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")
+ select nusers(port(nextP_shift17, \C, SigSpec())) > 1
+ select nusers(port(nextP_shift17, \PCIN, SigSpec())) == 0
+ index <SigBit> port(nextP_shift17, \C)[0] === port(std::get<0>(chain.back()), \P)[17]
+ semioptional
+endmatch
+
+code next
+ next = nextP;
+ if (!nextP)
+ next = nextP_shift17;
+ if (next) {
+ unextend = [](const SigSpec &sig) {
+ int i;
+ for (i = GetSize(sig)-1; i > 0; i--)
+ if (sig[i] != sig[i-1])
+ break;
+ // Do not remove non-const sign bit
+ if (sig[i].wire)
+ ++i;
+ return sig.extract(0, i);
+ };
+ }
+endcode
+
+// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
+// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
+// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
+// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
+// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
+// opportunity exists by matching for a $dff-with-optional-clock-enable-
+// or-reset and checking that the 'D' input of this register is the same
+// as the 'A' input of the previous DSP
+code argQ clock AREG
+ AREG = -1;
+ if (next) {
+ Cell *prev = std::get<0>(chain.back());
+ if (param(prev, \AREG, 2).as_int() > 0 &&
+ param(next, \AREG, 2).as_int() > 0 &&
+ param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&
+ nusers(port(prev, \ACOUT, SigSpec())) <= 1) {
+ argQ = unextend(port(next, \A));
+ clock = port(prev, \CLK);
+ subpattern(in_dffe);
+ if (dff) {
+ if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0)
+ goto reject_AREG;
+ if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0))
+ goto reject_AREG;
+ if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0)
+ goto reject_AREG;
+ if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0))
+ goto reject_AREG;
+ if (dffD == unextend(port(prev, \A)))
+ AREG = 1;
+reject_AREG: ;
+ }
+ }
+ }
+endcode
+
+// (4) Same as (3) but for BCOUT -> BCIN cascade
+code argQ clock BREG
+ BREG = -1;
+ if (next) {
+ Cell *prev = std::get<0>(chain.back());
+ if (param(prev, \BREG, 2).as_int() > 0 &&
+ param(next, \BREG, 2).as_int() > 0 &&
+ param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&
+ port(next, \BCIN, SigSpec()).is_fully_zero() &&
+ nusers(port(prev, \BCOUT, SigSpec())) <= 1) {
+ argQ = unextend(port(next, \B));
+ clock = port(prev, \CLK);
+ subpattern(in_dffe);
+ if (dff) {
+ if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0)
+ goto reject_BREG;
+ if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0))
+ goto reject_BREG;
+ if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0)
+ goto reject_BREG;
+ if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0))
+ goto reject_BREG;
+ if (dffD == unextend(port(prev, \B)))
+ BREG = 1;
+reject_BREG: ;
+ }
+ }
+ }
+endcode
+
+// (5) Recursively go to (2.1) until no more matches possible, recording the
+// longest possible chain
+code
+ if (next) {
+ chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);
+
+ SigSpec sigC = unextend(port(next, \C));
+
+ if (nextP_shift17) {
+ if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&
+ port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC)
+ subpattern(tail);
+ }
+ else {
+ if (GetSize(sigC) <= GetSize(port(std::get<0>(chain.back()), \P)) &&
+ port(std::get<0>(chain.back()), \P).extract(0, GetSize(sigC)) != sigC)
+ subpattern(tail);
+
+ }
+ } else {
+ if (GetSize(chain) > GetSize(longest_chain))
+ longest_chain = chain;
+ }
+finally
+ if (next)
+ chain.pop_back();
+endcode
+
+// #######################
+
+// Subpattern for matching against input registers, based on knowledge of the
+// 'Q' input. Typically, identifying registers with clock-enable and reset
+// capability would be a task would be handled by other Yosys passes such as
+// dff2dffe, but since DSP inference happens much before this, these patterns
+// have to be manually identified.
+// At a high level:
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// one that exclusively drives the 'D' input of the $dff, with one of its
+// $mux inputs being fully zero
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+subpattern in_dffe
+arg argD argQ clock
+
+code
+ dff = nullptr;
+ for (const auto &c : argQ.chunks()) {
+ // Abandon matches when 'Q' is a constant
+ if (!c.wire)
+ reject;
+ // Abandon matches when 'Q' has the keep attribute set
+ if (c.wire->get_bool_attribute(\keep))
+ reject;
+ // Abandon matches when 'Q' has a non-zero init attribute set
+ // (not supported by DSP48E1)
+ Const init = c.wire->attributes.at(\init, Const());
+ for (auto b : init.extract(c.offset, c.width))
+ if (b != State::Sx && b != State::S0)
+ reject;
+ }
+endcode
+
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+// 'Q' argument
+match ff
+ select ff->type.in($dff)
+ // DSP48E1 does not support clock inversion
+ select param(ff, \CLK_POLARITY).as_bool()
+
+ slice offset GetSize(port(ff, \D))
+ index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+ // Check that the rest of argQ is present
+ filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+ filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+ filter clock == SigBit() || port(ff, \CLK) == clock
+
+ set ffoffset offset
+endmatch
+
+code argQ argD
+ SigSpec Q = port(ff, \Q);
+ dff = ff;
+ dffclock = port(ff, \CLK);
+ dffD = argQ;
+ argD = port(ff, \D);
+ argQ = Q;
+ dffD.replace(argQ, argD);
+ // Only search for ffrstmux if dffD only
+ // has two (ff, ffrstmux) users
+ if (nusers(dffD) > 2)
+ argD = SigSpec();
+endcode
+
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+// exclusively drives the 'D' input of the $dff, with one of the $mux
+// inputs being fully zero
+match ffrstmux
+ if !argD.empty()
+ select ffrstmux->type.in($mux)
+ index <SigSpec> port(ffrstmux, \Y) === argD
+
+ choice <IdString> BA {\B, \A}
+ // DSP48E1 only supports reset to zero
+ select port(ffrstmux, BA).is_fully_zero()
+
+ define <bool> pol (BA == \B)
+ set ffrstpol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffrstmux) {
+ dffrstmux = ffrstmux;
+ dffrstpol = ffrstpol;
+ argD = port(ffrstmux, ffrstpol ? \A : \B);
+ dffD.replace(port(ffrstmux, \Y), argD);
+
+ // Only search for ffcemux if argQ has at
+ // least 3 users (ff, <upstream>, ffrstmux) and
+ // dffD only has two (ff, ffrstmux)
+ if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+ argD = SigSpec();
+ }
+ else
+ dffrstmux = nullptr;
+endcode
+
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+// exclusively drives the 'D' input of the $dff (or the other input of
+// the reset $mux) and where one of this $mux's inputs is connected to
+// the 'Q' output of the $dff
+match ffcemux
+ if !argD.empty()
+ select ffcemux->type.in($mux)
+ index <SigSpec> port(ffcemux, \Y) === argD
+ choice <IdString> AB {\A, \B}
+ index <SigSpec> port(ffcemux, AB) === argQ
+ define <bool> pol (AB == \A)
+ set ffcepol pol
+ semioptional
+endmatch
+
+code argD
+ if (ffcemux) {
+ dffcemux = ffcemux;
+ dffcepol = ffcepol;
+ argD = port(ffcemux, ffcepol ? \B : \A);
+ dffD.replace(port(ffcemux, \Y), argD);
+ }
+ else
+ dffcemux = nullptr;
+endcode
diff --git a/passes/pmgen/xilinx_srl.pmg b/passes/pmgen/xilinx_srl.pmg
index b18119b87..535b3dfdc 100644
--- a/passes/pmgen/xilinx_srl.pmg
+++ b/passes/pmgen/xilinx_srl.pmg
@@ -13,9 +13,9 @@ endcode
match first
select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !first->has_keep_attr()
- select !first->type.in(\FDRE) || !first->parameters.at(\IS_R_INVERTED, State::S0).as_bool()
- select !first->type.in(\FDRE) || !first->parameters.at(\IS_D_INVERTED, State::S0).as_bool()
- select !first->type.in(\FDRE, \FDRE_1) || first->connections_.at(\R, State::S0).is_fully_zero()
+ select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED, State::S0).as_bool()
+ select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED, State::S0).as_bool()
+ select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero()
filter !non_first_cells.count(first)
generate
SigSpec C = module->addWire(NEW_ID);
@@ -84,9 +84,9 @@ arg en_port
match first
select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !first->has_keep_attr()
- select !first->type.in(\FDRE) || !first->parameters.at(\IS_R_INVERTED, State::S0).as_bool()
- select !first->type.in(\FDRE) || !first->parameters.at(\IS_D_INVERTED, State::S0).as_bool()
- select !first->type.in(\FDRE, \FDRE_1) || first->connections_.at(\R, State::S0).is_fully_zero()
+ select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED, State::S0).as_bool()
+ select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED, State::S0).as_bool()
+ select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero()
endmatch
code clk_port en_port
@@ -111,10 +111,10 @@ match next
index <SigBit> port(next, \Q) === port(first, \D)
filter port(next, clk_port) == port(first, clk_port)
filter en_port == IdString() || port(next, en_port) == port(first, en_port)
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_C_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_C_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_D_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_D_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_R_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_R_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE, \FDRE_1) || next->connections_.at(\R, State::S0).is_fully_zero()
+ filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED, State::S0).as_bool() == param(first, \IS_C_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED, State::S0).as_bool() == param(first, \IS_D_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED, State::S0).as_bool() == param(first, \IS_R_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero()
endmatch
code
@@ -138,10 +138,10 @@ match next
index <SigBit> port(next, \Q) === port(chain.back(), \D)
filter port(next, clk_port) == port(first, clk_port)
filter en_port == IdString() || port(next, en_port) == port(first, en_port)
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_C_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_C_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_D_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_D_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE) || next->parameters.at(\IS_R_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_R_INVERTED, State::S0).as_bool()
- filter !first->type.in(\FDRE, \FDRE_1) || next->connections_.at(\R, State::S0).is_fully_zero()
+ filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED, State::S0).as_bool() == param(first, \IS_C_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED, State::S0).as_bool() == param(first, \IS_D_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED, State::S0).as_bool() == param(first, \IS_R_INVERTED, State::S0).as_bool()
+ filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero()
generate
Cell *cell = module->addCell(NEW_ID, chain.back()->type);
cell->setPort(\C, chain.back()->getPort(\C));
@@ -149,7 +149,7 @@ generate
cell->setPort(\Q, chain.back()->getPort(\D));
if (cell->type == \FDRE) {
if (rng(2) == 0)
- cell->setPort(\R, chain.back()->connections_.at(\R, State::S0));
+ cell->setPort(\R, port(chain.back(), \R, State::S0));
cell->setPort(\CE, chain.back()->getPort(\CE));
}
else if (cell->type.begins_with("$_DFFE_"))