diff options
Diffstat (limited to 'passes/pmgen')
-rw-r--r-- | passes/pmgen/README.md | 2 | ||||
-rw-r--r-- | passes/pmgen/generate.h | 140 | ||||
-rw-r--r-- | passes/pmgen/ice40_wrapcarry.pmg | 4 | ||||
-rw-r--r-- | passes/pmgen/peepopt.cc | 76 | ||||
-rw-r--r-- | passes/pmgen/peepopt_dffmux.pmg | 158 | ||||
-rw-r--r-- | passes/pmgen/pmgen.py | 16 | ||||
-rw-r--r-- | passes/pmgen/test_pmgen.cc | 129 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp.cc | 15 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp.pmg | 218 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp_CREG.pmg | 81 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp_cascade.pmg | 115 |
11 files changed, 699 insertions, 255 deletions
diff --git a/passes/pmgen/README.md b/passes/pmgen/README.md index 2f5b8d0b2..39560839f 100644 --- a/passes/pmgen/README.md +++ b/passes/pmgen/README.md @@ -190,7 +190,7 @@ create matches for different sections of a cell. For example: select pmux->type == $pmux slice idx GetSize(port(pmux, \S)) index <SigBit> port(pmux, \S)[idx] === port(eq, \Y) - set pmux_slice idx + set pmux_slice idx endmatch The first argument to `slice` is the local variable name used to identify the diff --git a/passes/pmgen/generate.h b/passes/pmgen/generate.h new file mode 100644 index 000000000..354583de5 --- /dev/null +++ b/passes/pmgen/generate.h @@ -0,0 +1,140 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PMGEN_GENERATE +#define PMGEN_GENERATE + +#define GENERATE_PATTERN(pmclass, pattern) \ + generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) + +void pmtest_addports(Module *module) +{ + pool<SigBit> driven_bits, used_bits; + SigMap sigmap(module); + int icnt = 0, ocnt = 0; + + for (auto cell : module->cells()) + for (auto conn : cell->connections()) + { + if (cell->input(conn.first)) + for (auto bit : sigmap(conn.second)) + used_bits.insert(bit); + if (cell->output(conn.first)) + for (auto bit : sigmap(conn.second)) + driven_bits.insert(bit); + } + + for (auto wire : vector<Wire*>(module->wires())) + { + SigSpec ibits, obits; + for (auto bit : sigmap(wire)) { + if (!used_bits.count(bit)) + obits.append(bit); + if (!driven_bits.count(bit)) + ibits.append(bit); + } + if (!ibits.empty()) { + Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); + w->port_input = true; + module->connect(ibits, w); + } + if (!obits.empty()) { + Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); + w->port_output = true; + module->connect(w, obits); + } + } + + module->fixup_ports(); +} + +template <class pm> +void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design) +{ + log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); + + int modcnt = 0; + int maxmodcnt = 100; + int maxsubcnt = 4; + int timeout = 0; + vector<Module*> mods; + + while (modcnt < maxmodcnt) + { + int submodcnt = 0, itercnt = 0, cellcnt = 0; + Module *mod = design->addModule(NEW_ID); + + while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) + { + if (timeout++ > 10000) + log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); + + pm matcher(mod, mod->cells()); + + matcher.rng(1); + matcher.rngseed += modcnt; + matcher.rng(1); + matcher.rngseed += submodcnt; + matcher.rng(1); + matcher.rngseed += itercnt; + matcher.rng(1); + matcher.rngseed += cellcnt; + matcher.rng(1); + + if (GetSize(mod->cells()) != cellcnt) + { + bool found_match = false; + run(matcher, [&](){ found_match = true; }); + cellcnt = GetSize(mod->cells()); + + if (found_match) { + Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", + pmclass, pattern, modcnt++)); + log("Creating module %s with %d cells.\n", log_id(m), cellcnt); + mod->cloneInto(m); + pmtest_addports(m); + mods.push_back(m); + submodcnt++; + timeout = 0; + } + } + + matcher.generate_mode = true; + run(matcher, [](){}); + } + + if (submodcnt && maxsubcnt < (1 << 16)) + maxsubcnt *= 2; + + design->remove(mod); + } + + Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); + log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); + for (auto mod : mods) { + Cell *c = m->addCell(mod->name, mod->name); + for (auto port : mod->ports) { + Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); + c->setPort(port, w); + } + } + pmtest_addports(m); +} + +#endif diff --git a/passes/pmgen/ice40_wrapcarry.pmg b/passes/pmgen/ice40_wrapcarry.pmg index 9e64c7467..bb59edb0c 100644 --- a/passes/pmgen/ice40_wrapcarry.pmg +++ b/passes/pmgen/ice40_wrapcarry.pmg @@ -9,3 +9,7 @@ match lut index <SigSpec> port(lut, \I1) === port(carry, \I0) index <SigSpec> port(lut, \I2) === port(carry, \I1) endmatch + +code + accept; +endcode diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index 72b02127a..2230145df 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -24,8 +24,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN bool did_something; +dict<SigBit, State> initbits; +pool<SigBit> rminitbits; #include "passes/pmgen/peepopt_pm.h" +#include "generate.h" struct PeepoptPass : public Pass { PeepoptPass() : Pass("peepopt", "collection of peephole optimizers") { } @@ -40,27 +43,86 @@ struct PeepoptPass : public Pass { } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { + std::string genmode; + log_header(design, "Executing PEEPOPT pass (run peephole optimizers).\n"); size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-singleton") { - // singleton_mode = true; - // continue; - // } + if (args[argidx] == "-generate" && argidx+1 < args.size()) { + genmode = args[++argidx]; + continue; + } break; } extra_args(args, argidx, design); - for (auto module : design->selected_modules()) { + if (!genmode.empty()) + { + initbits.clear(); + rminitbits.clear(); + + if (genmode == "shiftmul") + GENERATE_PATTERN(peepopt_pm, shiftmul); + else if (genmode == "muldiv") + GENERATE_PATTERN(peepopt_pm, muldiv); + else if (genmode == "dffmux") + GENERATE_PATTERN(peepopt_pm, dffmux); + else + log_abort(); + return; + } + + for (auto module : design->selected_modules()) + { did_something = true; - while (did_something) { + + while (did_something) + { did_something = false; - peepopt_pm pm(module, module->selected_cells()); + initbits.clear(); + rminitbits.clear(); + + peepopt_pm pm(module); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (sig[i].wire == nullptr) + continue; + if (val[i] != State::S0 && val[i] != State::S1) + continue; + initbits[sig[i]] = val[i]; + } + } + } + + pm.setup(module->selected_cells()); + pm.run_shiftmul(); pm.run_muldiv(); pm.run_dffmux(); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const &val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (rminitbits.count(sig[i])) + val[i] = State::Sx; + } + } + } + + initbits.clear(); + rminitbits.clear(); } } } diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index c88a52226..0069b0570 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -8,21 +8,23 @@ match dff select GetSize(port(dff, \D)) > 1 endmatch +code sigD + sigD = port(dff, \D); +endcode + match rstmux select rstmux->type == $mux select GetSize(port(rstmux, \Y)) > 1 - index <SigSpec> port(rstmux, \Y) === port(dff, \D) + index <SigSpec> port(rstmux, \Y) === sigD choice <IdString> BA {\B, \A} select port(rstmux, BA).is_fully_const() set rstmuxBA BA - optional + semioptional endmatch code sigD if (rstmux) sigD = port(rstmux, rstmuxBA == \B ? \A : \B); - else - sigD = port(dff, \D); endcode match cemux @@ -32,67 +34,111 @@ match cemux choice <IdString> AB {\A, \B} index <SigSpec> port(cemux, AB) === port(dff, \Q) set cemuxAB AB + semioptional endmatch code - SigSpec D = port(cemux, cemuxAB == \A ? \B : \A); - SigSpec Q = port(dff, \Q); + if (!cemux && !rstmux) + reject; +endcode + +code Const rst; - if (rstmux) + SigSpec D; + if (cemux) { + D = port(cemux, cemuxAB == \A ? \B : \A); + if (rstmux) + rst = port(rstmux, rstmuxBA).as_const(); + else + rst = Const(State::Sx, GetSize(D)); + } + else { + log_assert(rstmux); + D = port(rstmux, rstmuxBA == \B ? \A : \B); rst = port(rstmux, rstmuxBA).as_const(); + } + SigSpec Q = port(dff, \Q); int width = GetSize(D); - SigSpec &ceA = cemux->connections_.at(\A); - SigSpec &ceB = cemux->connections_.at(\B); - SigSpec &ceY = cemux->connections_.at(\Y); - SigSpec &dffD = dff->connections_.at(\D); - SigSpec &dffQ = dff->connections_.at(\Q); + SigSpec dffD = dff->getPort(\D); + SigSpec dffQ = dff->getPort(\Q); - if (D[width-1] == D[width-2]) { - did_something = true; + Const initval; + for (auto b : Q) { + auto it = initbits.find(b); + initval.bits.push_back(it == initbits.end() ? State::Sx : it->second); + } - SigBit sign = D[width-1]; - bool is_signed = sign.wire; - int i; - for (i = width-1; i >= 2; i--) { - if (!is_signed) { - module->connect(Q[i], sign); - if (D[i-1] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } - else { - module->connect(Q[i], Q[i-1]); - if (D[i-2] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } - } + auto cmpx = [=](State lhs, State rhs) { + if (lhs == State::Sx || rhs == State::Sx) + return true; + return lhs == rhs; + }; - ceA.remove(i, width-i); - ceB.remove(i, width-i); - ceY.remove(i, width-i); - cemux->fixup_parameters(); - dffD.remove(i, width-i); - dffQ.remove(i, width-i); + int i = width-1; + while (i > 1) { + if (D[i] != D[i-1]) + break; + if (!cmpx(rst[i], rst[i-1])) + break; + if (!cmpx(initval[i], initval[i-1])) + break; + if (!cmpx(rst[i], initval[i])) + break; + rminitbits.insert(Q[i]); + module->connect(Q[i], Q[i-1]); + i--; + } + if (i < width-1) { + did_something = true; + if (cemux) { + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); + ceA.remove(i, width-1-i); + ceB.remove(i, width-1-i); + ceY.remove(i, width-1-i); + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); + cemux->fixup_parameters(); + blacklist(cemux); + } + if (rstmux) { + SigSpec rstA = rstmux->getPort(\A); + SigSpec rstB = rstmux->getPort(\B); + SigSpec rstY = rstmux->getPort(\Y); + rstA.remove(i, width-1-i); + rstB.remove(i, width-1-i); + rstY.remove(i, width-1-i); + rstmux->setPort(\A, rstA); + rstmux->setPort(\B, rstB); + rstmux->setPort(\Y, rstY); + rstmux->fixup_parameters(); + blacklist(rstmux); + } + dffD.remove(i, width-1-i); + dffQ.remove(i, width-1-i); + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); + blacklist(dff); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux), width-i); - accept; + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i); + width = i+1; } - else { + if (cemux) { + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); + int count = 0; for (int i = width-1; i >= 0; i--) { if (D[i].wire) continue; - Wire *w = Q[i].wire; - auto it = w->attributes.find(\init); - State init; - if (it != w->attributes.end()) - init = it->second[Q[i].offset]; - else - init = State::Sx; - - if (init == State::Sx || init == D[i].data) { + if (cmpx(rst[i], D[i].data) && cmpx(initval[i], D[i].data)) { count++; + rminitbits.insert(Q[i]); module->connect(Q[i], D[i]); ceA.remove(i); ceB.remove(i); @@ -101,13 +147,25 @@ code dffQ.remove(i); } } - if (count > 0) { + if (count > 0) + { did_something = true; + + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); cemux->fixup_parameters(); + blacklist(cemux); + + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), count); + blacklist(dff); + + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count); } + } + if (did_something) accept; - } endcode diff --git a/passes/pmgen/pmgen.py b/passes/pmgen/pmgen.py index 39a09991d..df0ffaff2 100644 --- a/passes/pmgen/pmgen.py +++ b/passes/pmgen/pmgen.py @@ -362,6 +362,7 @@ with open(outfile, "w") as f: print(" Module *module;", file=f) print(" SigMap sigmap;", file=f) print(" std::function<void()> on_accept;", file=f) + print(" bool setup_done;", file=f) print(" bool generate_mode;", file=f) print(" int accept_cnt;", file=f) print("", file=f) @@ -477,7 +478,17 @@ with open(outfile, "w") as f: print("", file=f) print(" {}_pm(Module *module, const vector<Cell*> &cells) :".format(prefix), file=f) - print(" module(module), sigmap(module), generate_mode(false), rngseed(12345678) {", file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" setup(cells);", file=f) + print(" }", file=f) + print("", file=f) + + print(" {}_pm(Module *module) :".format(prefix), file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" }", file=f) + print("", file=f) + + print(" void setup(const vector<Cell*> &cells) {", file=f) for current_pattern in sorted(patterns.keys()): for s, t in sorted(udata_types[current_pattern].items()): if t.endswith("*"): @@ -485,6 +496,8 @@ with open(outfile, "w") as f: else: print(" ud_{}.{} = {}();".format(current_pattern, s, t), file=f) current_pattern = None + print(" log_assert(!setup_done);", file=f) + print(" setup_done = true;", file=f) print(" for (auto port : module->ports)", file=f) print(" add_siguser(module->wire(port), nullptr);", file=f) print(" for (auto cell : module->cells())", file=f) @@ -539,6 +552,7 @@ with open(outfile, "w") as f: for current_pattern in sorted(patterns.keys()): print(" int run_{}(std::function<void()> on_accept_f) {{".format(current_pattern), file=f) + print(" log_assert(setup_done);", file=f) print(" accept_cnt = 0;", file=f) print(" on_accept = on_accept_f;", file=f) print(" rollback = 0;", file=f) diff --git a/passes/pmgen/test_pmgen.cc b/passes/pmgen/test_pmgen.cc index 4f3eec935..72dc18dcc 100644 --- a/passes/pmgen/test_pmgen.cc +++ b/passes/pmgen/test_pmgen.cc @@ -23,13 +23,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -// for peepopt_pm -bool did_something; - #include "passes/pmgen/test_pmgen_pm.h" #include "passes/pmgen/ice40_dsp_pm.h" #include "passes/pmgen/xilinx_srl_pm.h" -#include "passes/pmgen/peepopt_pm.h" + +#include "generate.h" void reduce_chain(test_pmgen_pm &pm) { @@ -118,123 +116,6 @@ void opt_eqpmux(test_pmgen_pm &pm) log(" -> %s (%s)\n", log_id(c), log_id(c->type)); } -#define GENERATE_PATTERN(pmclass, pattern) \ - generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) - -void pmtest_addports(Module *module) -{ - pool<SigBit> driven_bits, used_bits; - SigMap sigmap(module); - int icnt = 0, ocnt = 0; - - for (auto cell : module->cells()) - for (auto conn : cell->connections()) - { - if (cell->input(conn.first)) - for (auto bit : sigmap(conn.second)) - used_bits.insert(bit); - if (cell->output(conn.first)) - for (auto bit : sigmap(conn.second)) - driven_bits.insert(bit); - } - - for (auto wire : vector<Wire*>(module->wires())) - { - SigSpec ibits, obits; - for (auto bit : sigmap(wire)) { - if (!used_bits.count(bit)) - obits.append(bit); - if (!driven_bits.count(bit)) - ibits.append(bit); - } - if (!ibits.empty()) { - Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); - w->port_input = true; - module->connect(ibits, w); - } - if (!obits.empty()) { - Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); - w->port_output = true; - module->connect(w, obits); - } - } - - module->fixup_ports(); -} - -template <class pm> -void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design) -{ - log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); - - int modcnt = 0; - int maxmodcnt = 100; - int maxsubcnt = 4; - int timeout = 0; - vector<Module*> mods; - - while (modcnt < maxmodcnt) - { - int submodcnt = 0, itercnt = 0, cellcnt = 0; - Module *mod = design->addModule(NEW_ID); - - while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) - { - if (timeout++ > 10000) - log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); - - pm matcher(mod, mod->cells()); - - matcher.rng(1); - matcher.rngseed += modcnt; - matcher.rng(1); - matcher.rngseed += submodcnt; - matcher.rng(1); - matcher.rngseed += itercnt; - matcher.rng(1); - matcher.rngseed += cellcnt; - matcher.rng(1); - - if (GetSize(mod->cells()) != cellcnt) - { - bool found_match = false; - run(matcher, [&](){ found_match = true; }); - cellcnt = GetSize(mod->cells()); - - if (found_match) { - Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", - pmclass, pattern, modcnt++)); - log("Creating module %s with %d cells.\n", log_id(m), cellcnt); - mod->cloneInto(m); - pmtest_addports(m); - mods.push_back(m); - submodcnt++; - timeout = 0; - } - } - - matcher.generate_mode = true; - run(matcher, [](){}); - } - - if (submodcnt && maxsubcnt < (1 << 16)) - maxsubcnt *= 2; - - design->remove(mod); - } - - Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); - log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); - for (auto mod : mods) { - Cell *c = m->addCell(mod->name, mod->name); - for (auto port : mod->ports) { - Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); - c->setPort(port, w); - } - } - pmtest_addports(m); -} - struct TestPmgenPass : public Pass { TestPmgenPass() : Pass("test_pmgen", "test pass for pmgen") { } void help() YS_OVERRIDE @@ -355,12 +236,6 @@ struct TestPmgenPass : public Pass { if (pattern == "xilinx_srl.variable") return GENERATE_PATTERN(xilinx_srl_pm, variable); - if (pattern == "peepopt-muldiv") - return GENERATE_PATTERN(peepopt_pm, muldiv); - - if (pattern == "peepopt-shiftmul") - return GENERATE_PATTERN(peepopt_pm, shiftmul); - log_cmd_error("Unknown pattern: %s\n", pattern.c_str()); } diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 11c7e5ea8..054e123e4 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -20,6 +20,7 @@ #include "kernel/yosys.h" #include "kernel/sigtools.h" +#include <deque> USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN @@ -608,8 +609,13 @@ struct XilinxDspPass : public Pass { extra_args(args, argidx, design); for (auto module : design->selected_modules()) { + // Experimental feature: pack $add/$sub cells with + // (* use_dsp48="simd" *) into DSP48E1's using its + // SIMD feature xilinx_simd_pack(module, module->selected_cells()); + // Match for all features ([ABDMP][12]?REG, pre-adder, + // post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); @@ -618,14 +624,17 @@ struct XilinxDspPass : public Pass { // is no guarantee that the cell ordering corresponds // to the "expected" case (i.e. the order in which // they appear in the source) thus the possiblity - // existed that a register got packed as CREG into a + // existed that a register got packed as a CREG into a // downstream DSP that should have otherwise been a - // PREG of an upstream DSP that had not been pattern - // matched yet + // PREG of an upstream DSP that had not been visited + // yet { xilinx_dsp_CREG_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_packC(xilinx_dsp_packC); } + // Lastly, identify and utilise PCOUT -> PCIN, + // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade + // chains { xilinx_dsp_cascade_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_cascade(); diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 3d0b1f2c3..604aa222b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -1,3 +1,57 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// ( 1) Starting from a DSP48E1 cell +// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If ADREG matched, treat 'A' input as input of ADREG +// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// If A2REG, then match 'A' input for A1REG +// ( 5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached +// to the 'P' output where it is compared to a constant that is a +// power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +// Notes: +// - The intention of this pattern matcher is for it to be compatible with +// DSP48E1 cells inferred from multiply operations by Yosys, as well as for +// user instantiations that may already contain the cells being packed... +// (though the latter is currently untested) +// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used +// for each *REG match, it has been factored out into two subpatterns: +// in_dffe and out_dffe located at the bottom of this file. +// - Matching for pattern detector features is currently incomplete. For +// example, matching for underflow as well as overflow detection is +// possible, as would auto-reset, enabling saturated arithmetic, detecting +// custom patterns, etc. + pattern xilinx_dsp_pack state <SigBit> clock @@ -5,12 +59,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP state <IdString> postAddAB postAddMuxAB state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol - state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux -// subpattern +// Variables used for subpatterns state <SigSpec> argQ argD state <bool> ffcepol ffrstpol state <int> ffoffset @@ -19,6 +72,7 @@ udata <SigBit> dffclock udata <Cell*> dff dffcemux dffrstmux udata <bool> dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch @@ -50,17 +104,21 @@ code sigA sigB sigC sigD sigM clock sigM.append(P[i]); } log_assert(nusers(P.extract_end(i)) <= 1); + // This sigM could have no users if downstream sinks (e.g. $add) is + // narrower than $mul result, for example + if (sigM.empty()) + reject; } else sigM = P; - // This sigM could have no users if downstream $add - // is narrower than $mul result, for example - if (sigM.empty()) - reject; clock = port(dsp, \CLK, SigBit()); endcode +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; @@ -81,6 +139,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock } endcode +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used @@ -106,11 +166,12 @@ code sigA sigD if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); - if (GetSize(sigA) < GetSize(sigD)) - std::swap(sigA, sigD); } endcode +// (4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) @@ -173,6 +234,8 @@ ffA1_end: ; } endcode +// (5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; @@ -222,6 +285,7 @@ ffB1_end: ; } endcode +// (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; @@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock } endcode +// (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; @@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock sigP = sigM; endcode +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() @@ -277,7 +347,9 @@ match postAdd index <SigBit> port(postAdd, AB)[0] === sigP[0] filter GetSize(port(postAdd, AB)) >= GetSize(sigP) filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP - filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) + // Check that remainder of AB is a sign-extension + define <bool> AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool()) + filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) set postAddAB AB optional endmatch @@ -289,6 +361,7 @@ code sigC sigP } endcode +// (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; @@ -314,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock } endcode +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ match postAddMux if postAdd if ffP @@ -331,6 +417,11 @@ code sigC sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +// the 'P' output where it is compared to a constant that is a power-of-2: +// e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" @@ -349,22 +440,45 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -377,14 +491,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -396,9 +508,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -430,6 +544,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) @@ -454,16 +572,32 @@ endcode // ####################### +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. +// At a high level: +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) subpattern out_dffe arg argD argQ clock code dff = nullptr; for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; endcode +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs match ffcemux select ffcemux->type.in($mux) // ffcemux output must have two users: ffcemux and ff.D @@ -502,6 +636,10 @@ code argD argQ } endcode +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero match ffrstmux select ffrstmux->type.in($mux) // ffrstmux output must have two users: ffrstmux and ff.D @@ -540,6 +678,8 @@ code argD argQ } endcode +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -556,32 +696,30 @@ match ff // Check that FF.Q is connected to CE-mux filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ - if (ff) { - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - - SigSpec D = port(ff, \D); - SigSpec Q = port(ff, \Q); - if (!ffcemux) { - argQ = argD; - argQ.replace(D, Q); - } - - for (auto c : argQ.chunks()) { - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; - } + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffcemux) { + argQ = argD; + argQ.replace(D, Q); + } - dff = ff; - dffQ = argQ; - dffclock = port(ff, \CLK); + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } - // No enable/reset mux possible without flop - else if (dffcemux || dffrstmux) - reject; + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a31dc80bf..a57043009 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,3 +1,26 @@ +// This file describes the second of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// Notes: +// - Running CREG packing after xilinx_dsp_pack is necessary since there is no +// guarantee that the cell ordering corresponds to the "expected" case (i.e. +// the order in which they appear in the source) thus the possiblity existed +// that a register got packed as a CREG into a downstream DSP that should +// have otherwise been a PREG of an upstream DSP that had not been visited +// yet +// - The reason this is separated out from the xilinx_dsp.pmg file is +// for efficiency --- each *.pmg file creates a class of the same basename, +// which when constructed, creates a custom database tailored to the +// pattern(s) contained within. Since the pattern in this file must be +// executed after the pattern contained in xilinx_dsp.pmg, it is necessary +// to reconstruct this database. Separating the two patterns into +// independent files causes two smaller, more specific, databases. + pattern xilinx_dsp_packC udata <std::function<SigSpec(const SigSpec&)>> unextend @@ -6,7 +29,7 @@ state <SigSpec> sigC sigP state <bool> ffCcepol ffCrstpol state <Cell*> ffC ffCcemux ffCrstmux -// subpattern +// Variables used for subpatterns state <SigSpec> argQ argD state <bool> ffcepol ffrstpol state <int> ffoffset @@ -15,13 +38,15 @@ udata <SigBit> dffclock udata <Cell*> dff dffcemux dffrstmux udata <bool> dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port match dsp select dsp->type.in(\DSP48E1) select param(dsp, \CREG, 1).as_int() == 0 select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock +code sigC sigP clock unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -48,11 +73,13 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock else sigP = P; - if (sigC == sigP) - reject; - clock = port(dsp, \CLK, SigBit()); +endcode +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using the in_dffe subpattern) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock argQ = sigC; subpattern(in_dffe); if (dff) { @@ -77,22 +104,44 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -105,14 +154,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -124,9 +171,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -158,6 +207,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 6f4ac5849..7a32df2b7 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -1,3 +1,46 @@ +// This file describes the third of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP +// (4) Same as (3) but for BCOUT -> BCIN cascade +// (5) Recursively go to (2.1) until no more matches possible, keeping track +// of the longest possible chain found +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate +// Notes: +// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered +// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need +// not be the case --- [AB] cascades can exist independently of a P cascade +// (though all three cascades must come from the same DSP). This situation +// is not handled currently. +// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently +// conservative in that they examine the situation where (a) the previous +// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no +// registers enabled, and (c) that there exists only one additional register +// between the upstream and downstream DSPs. This can certainly be relaxed +// to identify situations ranging from (i) neither DSP uses any registers, +// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and +// there exists a further 2 registers between them. This remains a TODO +// item. + pattern xilinx_dsp_cascade udata <std::function<SigSpec(const SigSpec&)>> unextend @@ -6,7 +49,7 @@ state <Cell*> next state <SigSpec> clock state <int> AREG BREG -// subpattern +// Variables used for subpatterns state <SigSpec> argQ argD state <bool> ffcepol ffrstpol state <int> ffoffset @@ -19,12 +62,19 @@ code #define MAX_DSP_CASCADE 20 endcode +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port match first select first->type.in(\DSP48E1) select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") select nusers(port(first, \PCOUT, SigSpec())) <= 1 endmatch +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate code longest_chain.clear(); chain.emplace_back(first, -1, -1, -1); @@ -106,6 +156,10 @@ subpattern tail arg first arg next +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused match nextP select nextP->type.in(\DSP48E1) select !param(nextP, \CREG, State::S1).as_bool() @@ -116,6 +170,8 @@ match nextP semioptional endmatch +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits match nextP_shift17 if !nextP select nextP_shift17->type.in(\DSP48E1) @@ -145,6 +201,14 @@ code next } endcode +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP code argQ clock AREG AREG = -1; if (next) { @@ -152,7 +216,6 @@ code argQ clock AREG if (param(prev, \AREG, 2).as_int() > 0 && param(next, \AREG, 2).as_int() > 0 && param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && - port(next, \ACIN, SigSpec()).is_fully_zero() && nusers(port(prev, \ACOUT, SigSpec())) <= 1) { argQ = unextend(port(next, \A)); clock = port(prev, \CLK); @@ -174,6 +237,7 @@ reject_AREG: ; } endcode +// (4) Same as (3) but for BCOUT -> BCIN cascade code argQ clock BREG BREG = -1; if (next) { @@ -203,13 +267,14 @@ reject_BREG: ; } endcode +// (5) Recursively go to (2.1) until no more matches possible, recording the +// longest possible chain code if (next) { chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG); SigSpec sigC = unextend(port(next, \C)); - // TODO: Cannot use 'reject' since semioptional if (nextP_shift17) { if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) && port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) @@ -232,22 +297,44 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -260,14 +347,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -279,9 +364,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -313,6 +400,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) |