diff options
Diffstat (limited to 'passes')
-rw-r--r-- | passes/pmgen/.gitignore | 2 | ||||
-rw-r--r-- | passes/pmgen/Makefile.inc | 6 | ||||
-rw-r--r-- | passes/pmgen/ice40_dsp.cc | 135 | ||||
-rw-r--r-- | passes/pmgen/ice40_dsp.pmg | 231 | ||||
-rw-r--r-- | passes/pmgen/pmgen.py | 3 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp.cc | 597 | ||||
-rw-r--r-- | passes/pmgen/xilinx_dsp.pmg | 621 | ||||
-rw-r--r-- | passes/tests/test_autotb.cc | 8 |
8 files changed, 1492 insertions, 111 deletions
diff --git a/passes/pmgen/.gitignore b/passes/pmgen/.gitignore index 6b319b8c3..e52f3282f 100644 --- a/passes/pmgen/.gitignore +++ b/passes/pmgen/.gitignore @@ -1 +1 @@ -/*_pm.h
\ No newline at end of file +/*_pm.h diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index 98691d0fe..21f29a49a 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -21,6 +21,12 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h)) # -------------------------------------- +OBJS += passes/pmgen/xilinx_dsp.o +passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h)) + +# -------------------------------------- + OBJS += passes/pmgen/peepopt.o passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) diff --git a/passes/pmgen/ice40_dsp.cc b/passes/pmgen/ice40_dsp.cc index 16bfe537f..68fc29f31 100644 --- a/passes/pmgen/ice40_dsp.cc +++ b/passes/pmgen/ice40_dsp.cc @@ -29,15 +29,15 @@ void create_ice40_dsp(ice40_dsp_pm &pm) { auto &st = pm.st_ice40_dsp; -#if 0 +#if 1 log("\n"); - log("ffA: %s\n", log_id(st.ffA, "--")); - log("ffB: %s\n", log_id(st.ffB, "--")); - log("mul: %s\n", log_id(st.mul, "--")); - log("ffY: %s\n", log_id(st.ffY, "--")); - log("addAB: %s\n", log_id(st.addAB, "--")); - log("muxAB: %s\n", log_id(st.muxAB, "--")); - log("ffS: %s\n", log_id(st.ffS, "--")); + log("ffA: %s\n", log_id(st.ffA, "--")); + log("ffB: %s\n", log_id(st.ffB, "--")); + log("mul: %s\n", log_id(st.mul, "--")); + log("ffFJKG: %s\n", log_id(st.ffFJKG, "--")); + log("addAB: %s\n", log_id(st.addAB, "--")); + log("muxAB: %s\n", log_id(st.muxAB, "--")); + log("ffO: %s\n", log_id(st.ffO, "--")); #endif log("Checking %s.%s for iCE40 DSP inference.\n", log_id(pm.module), log_id(st.mul)); @@ -52,42 +52,44 @@ void create_ice40_dsp(ice40_dsp_pm &pm) return; } - if (GetSize(st.sigS) > 32) { - log(" accumulator (%s) is too large (%d > 32).\n", log_signal(st.sigS), GetSize(st.sigS)); + if (GetSize(st.sigO) > 33) { + log(" adder/accumulator (%s) is too large (%d > 33).\n", log_signal(st.sigO), GetSize(st.sigO)); return; } - if (GetSize(st.sigY) > 32) { - log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigY), GetSize(st.sigY)); + if (GetSize(st.sigH) > 32) { + log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigH), GetSize(st.sigH)); return; } - bool mul_signed = st.mul->getParam("\\A_SIGNED").as_bool(); + Cell *cell = st.mul; + if (cell->type == "$mul") { + log(" replacing %s with SB_MAC16 cell.\n", log_id(st.mul->type)); - log(" replacing $mul with SB_MAC16 cell.\n"); - - Cell *cell = pm.module->addCell(NEW_ID, "\\SB_MAC16"); - pm.module->swap_names(cell, st.mul); + cell = pm.module->addCell(NEW_ID, "\\SB_MAC16"); + pm.module->swap_names(cell, st.mul); + } + else log_assert(cell->type == "\\SB_MAC16"); // SB_MAC16 Input Interface - SigSpec A = st.sigA; - A.extend_u0(16, mul_signed); + A.extend_u0(16, st.mul->getParam("\\A_SIGNED").as_bool()); + log_assert(GetSize(A) == 16); SigSpec B = st.sigB; - B.extend_u0(16, mul_signed); + B.extend_u0(16, st.mul->getParam("\\B_SIGNED").as_bool()); + log_assert(GetSize(B) == 16); - SigSpec CD; - if (st.muxA) - CD = st.muxA->getPort("\\B"); - if (st.muxB) - CD = st.muxB->getPort("\\A"); - CD.extend_u0(32, mul_signed); + SigSpec CD = st.sigCD; + if (CD.empty()) + CD = RTLIL::Const(0, 32); + else + log_assert(GetSize(CD) == 32); cell->setPort("\\A", A); cell->setPort("\\B", B); - cell->setPort("\\C", CD.extract(0, 16)); - cell->setPort("\\D", CD.extract(16, 16)); + cell->setPort("\\C", CD.extract(16, 16)); + cell->setPort("\\D", CD.extract(0, 16)); cell->setParam("\\A_REG", st.ffA ? State::S1 : State::S0); cell->setParam("\\B_REG", st.ffB ? State::S1 : State::S0); @@ -100,7 +102,7 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setPort("\\IRSTTOP", State::S0); cell->setPort("\\IRSTBOT", State::S0); - if (st.clock_vld) + if (st.clock != SigBit()) { cell->setPort("\\CLK", st.clock); cell->setPort("\\CE", State::S1); @@ -114,11 +116,11 @@ void create_ice40_dsp(ice40_dsp_pm &pm) if (st.ffB) log(" ffB:%s", log_id(st.ffB)); - if (st.ffY) - log(" ffY:%s", log_id(st.ffY)); + if (st.ffFJKG) + log(" ffFJKG:%s", log_id(st.ffFJKG)); - if (st.ffS) - log(" ffS:%s", log_id(st.ffS)); + if (st.ffO) + log(" ffO:%s", log_id(st.ffO)); log("\n"); } @@ -135,21 +137,43 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setPort("\\SIGNEXTOUT", pm.module->addWire(NEW_ID)); cell->setPort("\\CI", State::Sx); - cell->setPort("\\CO", pm.module->addWire(NEW_ID)); cell->setPort("\\ACCUMCI", State::Sx); cell->setPort("\\ACCUMCO", pm.module->addWire(NEW_ID)); // SB_MAC16 Output Interface - SigSpec O = st.ffS ? st.sigS : st.sigY; + SigSpec O = st.sigO; + int O_width = GetSize(O); + if (O_width == 33) { + log_assert(st.addAB); + // If we have a signed multiply-add, then perform sign extension + // TODO: Need to check CD[31:16] is sign extension of CD[15:0]? + if (st.addAB->getParam("\\A_SIGNED").as_bool() && st.addAB->getParam("\\B_SIGNED").as_bool()) + pm.module->connect(O[32], O[31]); + else + cell->setPort("\\CO", O[32]); + O.remove(O_width-1); + } + else + cell->setPort("\\CO", pm.module->addWire(NEW_ID)); + log_assert(GetSize(O) <= 32); if (GetSize(O) < 32) O.append(pm.module->addWire(NEW_ID, 32-GetSize(O))); cell->setPort("\\O", O); + bool accum = false; if (st.addAB) { - log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); + if (st.addA) + accum = (st.ffO && st.addAB->getPort("\\B") == st.sigO); + else if (st.addB) + accum = (st.ffO && st.addAB->getPort("\\A") == st.sigO); + else log_abort(); + if (accum) + log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); + else + log(" adder %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); cell->setPort("\\ADDSUBTOP", st.addAB->type == "$add" ? State::S0 : State::S1); cell->setPort("\\ADDSUBBOT", st.addAB->type == "$add" ? State::S0 : State::S1); } else { @@ -177,28 +201,43 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setParam("\\C_REG", State::S0); cell->setParam("\\D_REG", State::S0); - cell->setParam("\\TOP_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\BOT_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffY ? State::S1 : State::S0); + cell->setParam("\\TOP_8x8_MULT_REG", st.ffFJKG ? State::S1 : State::S0); + cell->setParam("\\BOT_8x8_MULT_REG", st.ffFJKG ? State::S1 : State::S0); + cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffFJKG ? State::S1 : State::S0); cell->setParam("\\PIPELINE_16x16_MULT_REG2", State::S0); - cell->setParam("\\TOPOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); cell->setParam("\\TOPADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\TOPADDSUB_UPPERINPUT", State::S0); + cell->setParam("\\TOPADDSUB_UPPERINPUT", accum ? State::S0 : State::S1); cell->setParam("\\TOPADDSUB_CARRYSELECT", Const(3, 2)); - cell->setParam("\\BOTOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); cell->setParam("\\BOTADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\BOTADDSUB_UPPERINPUT", State::S0); + cell->setParam("\\BOTADDSUB_UPPERINPUT", accum ? State::S0 : State::S1); cell->setParam("\\BOTADDSUB_CARRYSELECT", Const(0, 2)); cell->setParam("\\MODE_8x8", State::S0); - cell->setParam("\\A_SIGNED", mul_signed ? State::S1 : State::S0); - cell->setParam("\\B_SIGNED", mul_signed ? State::S1 : State::S0); + cell->setParam("\\A_SIGNED", st.mul->getParam("\\A_SIGNED").as_bool()); + cell->setParam("\\B_SIGNED", st.mul->getParam("\\B_SIGNED").as_bool()); + + if (st.ffO) { + if (st.ffO_lo) + cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + else + cell->setParam("\\TOPOUTPUT_SELECT", Const(1, 2)); + + st.ffO->connections_.at("\\Q").replace(O, pm.module->addWire(NEW_ID, GetSize(O))); + cell->setParam("\\BOTOUTPUT_SELECT", Const(1, 2)); + } + else { + cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + cell->setParam("\\BOTOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + } - pm.autoremove(st.mul); - pm.autoremove(st.ffY); - pm.autoremove(st.ffS); + if (cell != st.mul) + pm.autoremove(st.mul); + else + pm.blacklist(st.mul); + pm.autoremove(st.ffFJKG); + pm.autoremove(st.addAB); } struct Ice40DspPass : public Pass { diff --git a/passes/pmgen/ice40_dsp.pmg b/passes/pmgen/ice40_dsp.pmg index 7003092bb..fbf498109 100644 --- a/passes/pmgen/ice40_dsp.pmg +++ b/passes/pmgen/ice40_dsp.pmg @@ -1,87 +1,137 @@ pattern ice40_dsp state <SigBit> clock -state <bool> clock_pol clock_vld -state <SigSpec> sigA sigB sigY sigS +state <bool> clock_pol cd_signed +state <SigSpec> sigA sigB sigCD sigH sigO state <Cell*> addAB muxAB match mul - select mul->type.in($mul) + select mul->type.in($mul, \SB_MAC16) select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10 - select GetSize(mul->getPort(\Y)) > 10 endmatch +code sigA sigB sigH + SigSpec O; + if (mul->type == $mul) + O = mul->getPort(\Y); + else if (mul->type == \SB_MAC16) + O = mul->getPort(\O); + else log_abort(); + if (GetSize(O) <= 10) + reject; + + sigA = port(mul, \A); + int i; + for (i = GetSize(sigA)-1; i > 0; i--) + if (sigA[i] != sigA[i-1]) + break; + // Do not remove non-const sign bit + if (sigA[i].wire) + ++i; + sigA.remove(i, GetSize(sigA)-i); + sigB = port(mul, \B); + for (i = GetSize(sigB)-1; i > 0; i--) + if (sigB[i] != sigB[i-1]) + break; + // Do not remove non-const sign bit + if (sigB[i].wire) + ++i; + sigB.remove(i, GetSize(sigB)-i); + + // Only care about those bits that are used + for (i = 0; i < GetSize(O); i++) { + if (nusers(O[i]) <= 1) + break; + sigH.append(O[i]); + } + log_assert(nusers(O.extract_end(i)) <= 1); +endcode + match ffA + if mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool() select ffA->type.in($dff) - // select nusers(port(ffA, \Q)) == 2 - index <SigSpec> port(ffA, \Q) === port(mul, \A) + filter GetSize(port(ffA, \Q)) >= GetSize(sigA) + slice offset GetSize(port(ffA, \Q)) + filter offset+GetSize(sigA) <= GetSize(port(ffA, \Q)) && port(ffA, \Q).extract(offset, GetSize(sigA)) == sigA optional endmatch -code sigA clock clock_pol clock_vld - sigA = port(mul, \A); - +code sigA clock clock_pol if (ffA) { - sigA = port(ffA, \D); + for (auto b : port(ffA, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; clock = port(ffA, \CLK).as_bit(); clock_pol = param(ffA, \CLK_POLARITY).as_bool(); - clock_vld = true; + + sigA.replace(port(ffA, \Q), port(ffA, \D)); } endcode match ffB + if mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool() select ffB->type.in($dff) - // select nusers(port(ffB, \Q)) == 2 - index <SigSpec> port(ffB, \Q) === port(mul, \B) + filter GetSize(port(ffB, \Q)) >= GetSize(sigB) + slice offset GetSize(port(ffB, \Q)) + filter offset+GetSize(sigB) <= GetSize(port(ffB, \Q)) && port(ffB, \Q).extract(offset, GetSize(sigB)) == sigB optional endmatch -code sigB clock clock_pol clock_vld - sigB = port(mul, \B); - +code sigB clock clock_pol if (ffB) { - sigB = port(ffB, \D); + for (auto b : port(ffB, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; + SigBit c = port(ffB, \CLK).as_bit(); bool cp = param(ffB, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; + + sigB.replace(port(ffB, \Q), port(ffB, \D)); } endcode -match ffY - select ffY->type.in($dff) - select nusers(port(ffY, \D)) == 2 - index <SigSpec> port(ffY, \D) === port(mul, \Y) +match ffFJKG + // Ensure pipeline register is not already used + if mul->type != \SB_MAC16 || (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool()) + select ffFJKG->type.in($dff) + select nusers(port(ffFJKG, \D)) == 2 + index <SigSpec> port(ffFJKG, \D) === sigH optional endmatch -code sigY clock clock_pol clock_vld - sigY = port(mul, \Y); +code sigH sigO clock clock_pol + if (ffFJKG) { + sigH = port(ffFJKG, \Q); + for (auto b : sigH) + if (b.wire->get_bool_attribute(\keep)) + reject; - if (ffY) { - sigY = port(ffY, \Q); - SigBit c = port(ffY, \CLK).as_bit(); - bool cp = param(ffY, \CLK_POLARITY).as_bool(); + SigBit c = port(ffFJKG, \CLK).as_bit(); + bool cp = param(ffFJKG, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; } + + sigO = sigH; endcode match addA select addA->type.in($add) select nusers(port(addA, \A)) == 2 - index <SigSpec> port(addA, \A) === sigY + filter param(addA, \A_WIDTH).as_int() <= GetSize(sigH) + //index <SigSpec> port(addA, \A) === sigH.extract(0, param(addA, \A_WIDTH).as_int()) + filter port(addA, \A) == sigH.extract(0, param(addA, \A_WIDTH).as_int()) optional endmatch @@ -89,75 +139,134 @@ match addB if !addA select addB->type.in($add, $sub) select nusers(port(addB, \B)) == 2 - index <SigSpec> port(addB, \B) === sigY + filter param(addB, \B_WIDTH).as_int() <= GetSize(sigH) + //index <SigSpec> port(addB, \B) === sigH.extract(0, param(addB, \B_WIDTH).as_int()) + filter port(addB, \B) == sigH.extract(0, param(addB, \B_WIDTH).as_int()) optional endmatch -code addAB sigS +code addAB sigCD sigO cd_signed if (addA) { addAB = addA; - sigS = port(addA, \B); + sigCD = port(addAB, \B); + cd_signed = param(addAB, \B_SIGNED).as_bool(); } - if (addB) { + else if (addB) { addAB = addB; - sigS = port(addB, \A); + sigCD = port(addAB, \A); + cd_signed = param(addAB, \A_SIGNED).as_bool(); } if (addAB) { + if (mul->type == \SB_MAC16) { + // Ensure that adder is not used + if (param(mul, \TOPOUTPUT_SELECT).as_int() != 3 || + param(mul, \BOTOUTPUT_SELECT).as_int() != 3) + reject; + } + int natural_mul_width = GetSize(sigA) + GetSize(sigB); - int actual_mul_width = GetSize(sigY); - int actual_acc_width = GetSize(sigS); + int actual_mul_width = GetSize(sigH); + int actual_acc_width = GetSize(sigCD); if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) reject; - if ((actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool())) + // If accumulator, check adder width and signedness + if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool())) reject; + + sigO = port(addAB, \Y); } endcode match muxA - if addAB select muxA->type.in($mux) - select nusers(port(muxA, \A)) == 2 - index <SigSpec> port(muxA, \A) === port(addAB, \Y) + index <int> nusers(port(muxA, \A)) === 2 + index <SigSpec> port(muxA, \A) === sigO optional endmatch match muxB - if addAB if !muxA select muxB->type.in($mux) - select nusers(port(muxB, \B)) == 2 - index <SigSpec> port(muxB, \B) === port(addAB, \Y) + index <int> nusers(port(muxB, \B)) === 2 + index <SigSpec> port(muxB, \B) === sigO optional endmatch -code muxAB - muxAB = addAB; +code muxAB sigO if (muxA) muxAB = muxA; - if (muxB) + else if (muxB) muxAB = muxB; + if (muxAB) + sigO = port(muxAB, \Y); endcode -match ffS - if muxAB - select ffS->type.in($dff) - select nusers(port(ffS, \D)) == 2 - index <SigSpec> port(ffS, \D) === port(muxAB, \Y) - index <SigSpec> port(ffS, \Q) === sigS +match ffO + // Ensure that register is not already used + if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1) + // Ensure that OLOADTOP/OLOADBOT is unused or zero + if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero()) + if nusers(sigO) == 2 + select ffO->type.in($dff) + filter GetSize(port(ffO, \D)) >= GetSize(sigO) + slice offset GetSize(port(ffO, \D)) + filter offset+GetSize(sigO) <= GetSize(port(ffO, \D)) && port(ffO, \D).extract(offset, GetSize(sigO)) == sigO + optional +endmatch + +match ffO_lo + if !ffO && GetSize(sigO) > 16 + // Ensure that register is not already used + if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1) + // Ensure that OLOADTOP/OLOADBOT is unused or zero + if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero()) + if nusers(sigO.extract(0, 16)) == 2 + select ffO_lo->type.in($dff) + filter GetSize(port(ffO_lo, \D)) >= 16 + slice offset GetSize(port(ffO_lo, \D)) + filter offset+GetSize(sigO) <= GetSize(port(ffO_lo, \D)) && port(ffO_lo, \D).extract(offset, 16) == sigO.extract(0, 16) + optional endmatch -code clock clock_pol clock_vld - if (ffS) { - SigBit c = port(ffS, \CLK).as_bit(); - bool cp = param(ffS, \CLK_POLARITY).as_bool(); +code ffO clock clock_pol sigO sigCD cd_signed + if (ffO_lo) { + log_assert(!ffO); + ffO = ffO_lo; + } + if (ffO) { + for (auto b : port(ffO, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; + + SigBit c = port(ffO, \CLK).as_bit(); + bool cp = param(ffO, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; + + sigO.replace(port(ffO, \D), port(ffO, \Q)); + + // Loading value into output register is not + // supported unless using accumulator + if (muxAB) { + if (sigCD != sigO) + reject; + if (muxA) + sigCD = port(muxAB, \B); + else if (muxB) + sigCD = port(muxAB, \A); + else log_abort(); + + cd_signed = addAB && param(addAB, \A_SIGNED).as_bool() && param(addAB, \B_SIGNED).as_bool(); + } } + sigCD.extend_u0(32, cd_signed); +endcode + +code accept; endcode diff --git a/passes/pmgen/pmgen.py b/passes/pmgen/pmgen.py index 573722d68..335d26f16 100644 --- a/passes/pmgen/pmgen.py +++ b/passes/pmgen/pmgen.py @@ -305,7 +305,8 @@ def process_pmgfile(f, filename): block["states"] = set() for s in line.split()[1:]: - assert s in state_types[current_pattern] + if s not in state_types[current_pattern]: + raise RuntimeError("'%s' not in state_types" % s) block["states"].add(s) codetype = "code" diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc new file mode 100644 index 000000000..5af48e4d2 --- /dev/null +++ b/passes/pmgen/xilinx_dsp.cc @@ -0,0 +1,597 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * 2019 Eddie Hung <eddie@fpgeh.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "passes/pmgen/xilinx_dsp_pm.h" + +static Cell* addDsp(Module *module) { + Cell *cell = module->addCell(NEW_ID, "\\DSP48E1"); + cell->setParam("\\ACASCREG", 0); + cell->setParam("\\ADREG", 0); + cell->setParam("\\A_INPUT", Const("DIRECT")); + cell->setParam("\\ALUMODEREG", 0); + cell->setParam("\\AREG", 0); + cell->setParam("\\BCASCREG", 0); + cell->setParam("\\B_INPUT", Const("DIRECT")); + cell->setParam("\\BREG", 0); + cell->setParam("\\CARRYINREG", 0); + cell->setParam("\\CARRYINSELREG", 0); + cell->setParam("\\CREG", 0); + cell->setParam("\\DREG", 0); + cell->setParam("\\INMODEREG", 0); + cell->setParam("\\MREG", 0); + cell->setParam("\\OPMODEREG", 0); + cell->setParam("\\PREG", 0); + cell->setParam("\\USE_MULT", Const("NONE")); + cell->setParam("\\USE_SIMD", Const("ONE48")); + cell->setParam("\\USE_DPORT", Const("FALSE")); + + cell->setPort("\\D", Const(0, 24)); + cell->setPort("\\INMODE", Const(0, 5)); + cell->setPort("\\ALUMODE", Const(0, 4)); + cell->setPort("\\OPMODE", Const(0, 7)); + cell->setPort("\\CARRYINSEL", Const(0, 3)); + cell->setPort("\\ACIN", Const(0, 30)); + cell->setPort("\\BCIN", Const(0, 18)); + cell->setPort("\\PCIN", Const(0, 48)); + cell->setPort("\\CARRYIN", Const(0, 1)); + return cell; +} + +void pack_xilinx_simd(Module *module, const std::vector<Cell*> &selected_cells) +{ + std::deque<Cell*> simd12_add, simd12_sub; + std::deque<Cell*> simd24_add, simd24_sub; + + for (auto cell : selected_cells) { + if (!cell->type.in("$add", "$sub")) + continue; + SigSpec Y = cell->getPort("\\Y"); + if (!Y.is_chunk()) + continue; + if (!Y.as_chunk().wire->get_strpool_attribute("\\use_dsp").count("simd")) + continue; + if (GetSize(Y) > 25) + continue; + SigSpec A = cell->getPort("\\A"); + SigSpec B = cell->getPort("\\B"); + if (GetSize(Y) <= 13) { + if (GetSize(A) > 12) + continue; + if (GetSize(B) > 12) + continue; + if (cell->type == "$add") + simd12_add.push_back(cell); + else if (cell->type == "$sub") + simd12_sub.push_back(cell); + } + else if (GetSize(Y) <= 25) { + if (GetSize(A) > 24) + continue; + if (GetSize(B) > 24) + continue; + if (cell->type == "$add") + simd24_add.push_back(cell); + else if (cell->type == "$sub") + simd24_sub.push_back(cell); + } + else + log_abort(); + } + + auto f12 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort("\\A"); + SigSpec B = lane->getPort("\\B"); + SigSpec Y = lane->getPort("\\Y"); + A.extend_u0(12, lane->getParam("\\A_SIGNED").as_bool()); + B.extend_u0(12, lane->getParam("\\B_SIGNED").as_bool()); + AB.append(A); + C.append(B); + if (GetSize(Y) < 13) + Y.append(module->addWire(NEW_ID, 13-GetSize(Y))); + else + log_assert(GetSize(Y) == 13); + P.append(Y.extract(0, 12)); + CARRYOUT.append(Y[12]); + }; + auto g12 = [&f12,module](std::deque<Cell*> &simd12) { + while (simd12.size() > 1) { + SigSpec AB, C, P, CARRYOUT; + + Cell *lane1 = simd12.front(); + simd12.pop_front(); + Cell *lane2 = simd12.front(); + simd12.pop_front(); + Cell *lane3 = nullptr; + Cell *lane4 = nullptr; + + if (!simd12.empty()) { + lane3 = simd12.front(); + simd12.pop_front(); + if (!simd12.empty()) { + lane4 = simd12.front(); + simd12.pop_front(); + } + } + + log("Analysing %s.%s for Xilinx DSP SIMD12 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam("\\USE_SIMD", Const("FOUR12")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort("\\OPMODE", Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f12(AB, C, P, CARRYOUT, lane1); + f12(AB, C, P, CARRYOUT, lane2); + if (lane3) { + f12(AB, C, P, CARRYOUT, lane3); + if (lane4) + f12(AB, C, P, CARRYOUT, lane4); + else { + AB.append(Const(0, 12)); + C.append(Const(0, 12)); + P.append(module->addWire(NEW_ID, 12)); + CARRYOUT.append(module->addWire(NEW_ID, 1)); + } + } + else { + AB.append(Const(0, 24)); + C.append(Const(0, 24)); + P.append(module->addWire(NEW_ID, 24)); + CARRYOUT.append(module->addWire(NEW_ID, 2)); + } + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort("\\A", AB.extract(18, 30)); + cell->setPort("\\B", AB.extract(0, 18)); + cell->setPort("\\C", C); + cell->setPort("\\P", P); + cell->setPort("\\CARRYOUT", CARRYOUT); + if (lane1->type == "$sub") + cell->setPort("\\ALUMODE", Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + if (lane3) module->remove(lane3); + if (lane4) module->remove(lane4); + + module->design->select(module, cell); + } + }; + g12(simd12_add); + g12(simd12_sub); + + auto f24 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort("\\A"); + SigSpec B = lane->getPort("\\B"); + SigSpec Y = lane->getPort("\\Y"); + A.extend_u0(24, lane->getParam("\\A_SIGNED").as_bool()); + B.extend_u0(24, lane->getParam("\\B_SIGNED").as_bool()); + C.append(A); + AB.append(B); + if (GetSize(Y) < 25) + Y.append(module->addWire(NEW_ID, 25-GetSize(Y))); + else + log_assert(GetSize(Y) == 25); + P.append(Y.extract(0, 24)); + CARRYOUT.append(module->addWire(NEW_ID)); // TWO24 uses every other bit + CARRYOUT.append(Y[24]); + }; + auto g24 = [&f24,module](std::deque<Cell*> &simd24) { + while (simd24.size() > 1) { + SigSpec AB; + SigSpec C; + SigSpec P; + SigSpec CARRYOUT; + + Cell *lane1 = simd24.front(); + simd24.pop_front(); + Cell *lane2 = simd24.front(); + simd24.pop_front(); + + log("Analysing %s.%s for Xilinx DSP SIMD24 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam("\\USE_SIMD", Const("TWO24")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort("\\OPMODE", Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f24(AB, C, P, CARRYOUT, lane1); + f24(AB, C, P, CARRYOUT, lane2); + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort("\\A", AB.extract(18, 30)); + cell->setPort("\\B", AB.extract(0, 18)); + cell->setPort("\\C", C); + cell->setPort("\\P", P); + cell->setPort("\\CARRYOUT", CARRYOUT); + if (lane1->type == "$sub") + cell->setPort("\\ALUMODE", Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + + module->design->select(module, cell); + } + }; + g24(simd24_add); + g24(simd24_sub); +} + + +void pack_xilinx_dsp(dict<SigBit, Cell*> &bit_to_driver, xilinx_dsp_pm &pm) +{ + auto &st = pm.st_xilinx_dsp; + +#if 1 + log("\n"); + log("preAdd: %s\n", log_id(st.preAdd, "--")); + log("ffAD: %s %s %s\n", log_id(st.ffAD, "--"), log_id(st.ffADcemux, "--"), log_id(st.ffADrstmux, "--")); + log("ffA2: %s %s %s\n", log_id(st.ffA2, "--"), log_id(st.ffA2cemux, "--"), log_id(st.ffA2rstmux, "--")); + log("ffA1: %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--")); + log("ffB2: %s %s %s\n", log_id(st.ffB2, "--"), log_id(st.ffB2cemux, "--"), log_id(st.ffB2rstmux, "--")); + log("ffB1: %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--")); + log("ffC: %s %s %s\n", log_id(st.ffC, "--"), log_id(st.ffCcemux, "--"), log_id(st.ffCrstmux, "--")); + log("ffD: %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--")); + log("dsp: %s\n", log_id(st.dsp, "--")); + log("ffM: %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--")); + log("postAdd: %s\n", log_id(st.postAdd, "--")); + log("postAddMux: %s\n", log_id(st.postAddMux, "--")); + log("ffP: %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--")); + log("overflow: %s\n", log_id(st.overflow, "--")); +#endif + + log("Analysing %s.%s for Xilinx DSP packing.\n", log_id(pm.module), log_id(st.dsp)); + + Cell *cell = st.dsp; + + if (st.preAdd) { + log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type)); + bool A_SIGNED = st.preAdd->getParam("\\A_SIGNED").as_bool(); + bool D_SIGNED = st.preAdd->getParam("\\B_SIGNED").as_bool(); + if (st.sigA == st.preAdd->getPort("\\B")) + std::swap(A_SIGNED, D_SIGNED); + st.sigA.extend_u0(30, A_SIGNED); + st.sigD.extend_u0(25, D_SIGNED); + cell->setPort("\\A", st.sigA); + cell->setPort("\\D", st.sigD); + cell->connections_.at("\\INMODE") = Const::from_string("00100"); + + if (st.ffAD) { + if (st.ffADcemux) { + SigSpec S = st.ffADcemux->getPort("\\S"); + cell->setPort("\\CEAD", st.ffADcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CEAD", State::S1); + cell->setParam("\\ADREG", 1); + } + + cell->setParam("\\USE_DPORT", Const("TRUE")); + + pm.autoremove(st.preAdd); + } + if (st.postAdd) { + log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type)); + + SigSpec &opmode = cell->connections_.at("\\OPMODE"); + if (st.postAddMux) { + log_assert(st.ffP); + opmode[4] = st.postAddMux->getPort("\\S"); + pm.autoremove(st.postAddMux); + } + else if (st.ffP && st.sigC == st.sigP) + opmode[4] = State::S0; + else + opmode[4] = State::S1; + opmode[6] = State::S0; + opmode[5] = State::S1; + + if (opmode[4] != State::S0) { + //if (st.postAddMuxAB == "\\A") + // st.sigC.extend_u0(48, st.postAdd->getParam("\\B_SIGNED").as_bool()); + //else + // st.sigC.extend_u0(48, st.postAdd->getParam("\\A_SIGNED").as_bool()); + cell->setPort("\\C", st.sigC); + } + + pm.autoremove(st.postAdd); + } + if (st.overflow) { + log(" overflow %s (%s)\n", log_id(st.overflow), log_id(st.overflow->type)); + cell->setParam("\\USE_PATTERN_DETECT", Const("PATDET")); + cell->setParam("\\SEL_PATTERN", Const("PATTERN")); + cell->setParam("\\SEL_MASK", Const("MASK")); + + if (st.overflow->type == "$ge") { + Const B = st.overflow->getPort("\\B").as_const(); + log_assert(std::count(B.bits.begin(), B.bits.end(), State::S1) == 1); + // Since B is an exact power of 2, subtract 1 + // by inverting all bits up until hitting + // that one hi bit + for (auto &b : B.bits) + if (b == State::S0) b = State::S1; + else if (b == State::S1) { + b = State::S0; + break; + } + B.extu(48); + + cell->setParam("\\MASK", B); + cell->setParam("\\PATTERN", Const(0, 48)); + cell->setPort("\\OVERFLOW", st.overflow->getPort("\\Y")); + } + else log_abort(); + + pm.autoremove(st.overflow); + } + + if (st.clock != SigBit()) + { + cell->setPort("\\CLK", st.clock); + + auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) { + SigSpec D = ff->getPort("\\D"); + SigSpec Q = pm.sigmap(ff->getPort("\\Q")); + if (!A.empty()) + A.replace(Q, D); + if (rstmux) { + SigSpec Y = rstmux->getPort("\\Y"); + SigSpec AB = rstmux->getPort(rstpol ? "\\A" : "\\B"); + if (!A.empty()) + A.replace(Y, AB); + if (rstport != IdString()) { + SigSpec S = rstmux->getPort("\\S"); + cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S)); + } + } + else if (rstport != IdString()) + cell->setPort(rstport, State::S0); + if (cemux) { + SigSpec Y = cemux->getPort("\\Y"); + SigSpec BA = cemux->getPort(cepol ? "\\B" : "\\A"); + SigSpec S = cemux->getPort("\\S"); + if (!A.empty()) + A.replace(Y, BA); + cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort(ceport, State::S1); + + for (auto c : Q.chunks()) { + auto it = c.wire->attributes.find("\\init"); + if (it == c.wire->attributes.end()) + continue; + for (int i = c.offset; i < c.offset+c.width; i++) { + log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); + it->second[i] = State::Sx; + } + } + }; + + if (st.ffA2) { + SigSpec &A = cell->connections_.at("\\A"); + f(A, st.ffA2, st.ffA2cemux, st.ffA2cepol, "\\CEA2", st.ffA2rstmux, st.ffArstpol, "\\RSTA"); + pm.add_siguser(A, cell); + if (st.ffA1) { + f(A, st.ffA1, st.ffA1cemux, st.ffA1cepol, "\\CEA1", st.ffA1rstmux, st.ffArstpol, IdString()); + cell->setParam("\\AREG", 2); + } + else + cell->setParam("\\AREG", 1); + } + if (st.ffB2) { + SigSpec &B = cell->connections_.at("\\B"); + f(B, st.ffB2, st.ffB2cemux, st.ffB2cepol, "\\CEB2", st.ffB2rstmux, st.ffBrstpol, "\\RSTB"); + pm.add_siguser(B, cell); + if (st.ffB1) { + f(B, st.ffB1, st.ffB1cemux, st.ffB1cepol, "\\CEB1", st.ffB1rstmux, st.ffBrstpol, IdString()); + cell->setParam("\\BREG", 2); + } + else + cell->setParam("\\BREG", 1); + } + if (st.ffC) { + SigSpec &C = cell->connections_.at("\\C"); + f(C, st.ffC, st.ffCcemux, st.ffCcepol, "\\CEC", st.ffCrstmux, st.ffCrstpol, "\\RSTC"); + pm.add_siguser(C, cell); + cell->setParam("\\CREG", 1); + } + if (st.ffD) { + SigSpec &D = cell->connections_.at("\\D"); + f(D, st.ffD, st.ffDcemux, st.ffDcepol, "\\CED", st.ffDrstmux, st.ffDrstpol, "\\RSTD"); + pm.add_siguser(D, cell); + cell->setParam("\\DREG", 1); + } + if (st.ffM) { + SigSpec M; // unused + f(M, st.ffM, st.ffMcemux, st.ffMcepol, "\\CEM", st.ffMrstmux, st.ffMrstpol, "\\RSTM"); + st.ffM->connections_.at("\\Q").replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM))); + cell->setParam("\\MREG", State::S1); + } + if (st.ffP) { + SigSpec P; // unused + f(P, st.ffP, st.ffPcemux, st.ffPcepol, "\\CEP", st.ffPrstmux, st.ffPrstpol, "\\RSTP"); + st.ffP->connections_.at("\\Q").replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP))); + cell->setParam("\\PREG", State::S1); + } + + log(" clock: %s (%s)", log_signal(st.clock), "posedge"); + + if (st.ffA2) { + log(" ffA2:%s", log_id(st.ffA2)); + if (st.ffA1) + log(" ffA1:%s", log_id(st.ffA1)); + } + + if (st.ffAD) + log(" ffAD:%s", log_id(st.ffAD)); + + if (st.ffB2) { + log(" ffB2:%s", log_id(st.ffB2)); + if (st.ffB1) + log(" ffB1:%s", log_id(st.ffB1)); + } + + if (st.ffC) + log(" ffC:%s", log_id(st.ffC)); + + if (st.ffD) + log(" ffD:%s", log_id(st.ffD)); + + if (st.ffM) + log(" ffM:%s", log_id(st.ffM)); + + if (st.ffP) + log(" ffP:%s", log_id(st.ffP)); + + log("\n"); + } + + SigSpec P = st.sigP; + if (GetSize(P) < 48) + P.append(pm.module->addWire(NEW_ID, 48-GetSize(P))); + cell->setPort("\\P", P); + + bit_to_driver.insert(std::make_pair(P[0], cell)); + bit_to_driver.insert(std::make_pair(P[17], cell)); + + pm.blacklist(cell); +} + +struct XilinxDspPass : public Pass { + XilinxDspPass() : Pass("xilinx_dsp", "Xilinx: pack resources into DSPs") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" xilinx_dsp [options] [selection]\n"); + log("\n"); + log("Pack input registers (A2, A1, B2, B1, C, D, AD; with optional enable/reset),\n"); + log("pipeline registers (M; with optional enable/reset), output registers (P; with\n"); + log("optional enable/reset), pre-adder and/or post-adder into Xilinx DSP resources.\n"); + log("\n"); + log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n"); + log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n"); + log("used to override the existing accumulation result with a new value.\n"); + log("\n"); + log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n"); + log("connections (optionally, where 'P' is right-shifted by 18-bits and used as an\n"); + log("input to the post-adder -- a pattern common for summing partial products to\n"); + log("implement wide multipliers).\n"); + log("\n"); + log("\n"); + log("Experimental feature: addition/subtractions less than 12 or 24 bits with the\n"); + log("'(* use_dsp=\"simd\" *)' attribute attached to the output wire or attached to\n"); + log("the add/subtract operator will cause those operations to be implemented using\n"); + log("the 'SIMD' feature of DSPs.\n"); + log("\n"); + log("Experimental feature: the presence of a `$ge' cell attached to the registered\n"); + log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n"); + log("into using the DSP48E1's pattern detector feature for overflow detection.\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + // if (args[argidx] == "-singleton") { + // singleton_mode = true; + // continue; + // } + break; + } + extra_args(args, argidx, design); + + for (auto module : design->selected_modules()) { + pack_xilinx_simd(module, module->selected_cells()); + + xilinx_dsp_pm pm(module, module->selected_cells()); + dict<SigBit, Cell*> bit_to_driver; + auto f = [&bit_to_driver](xilinx_dsp_pm &pm){ pack_xilinx_dsp(bit_to_driver, pm); }; + pm.run_xilinx_dsp(f); + + auto &unextend = pm.ud_xilinx_dsp.unextend; + // Look for ability to convert C input from another DSP into PCIN + // NB: Needs to be done after pattern matcher has folded all + // $add cells into the DSP + for (auto cell : module->cells()) { + if (cell->type != "\\DSP48E1") + continue; + if (cell->parameters.at("\\CREG", State::S1).as_bool()) + continue; + SigSpec &opmode = cell->connections_.at("\\OPMODE"); + if (opmode.extract(4,3) != Const::from_string("011")) + continue; + SigSpec C = unextend(pm.sigmap(cell->getPort("\\C"))); + if (!C[0].wire) + continue; + auto it = bit_to_driver.find(C[0]); + if (it == bit_to_driver.end()) + continue; + auto driver = it->second; + + SigSpec P = driver->getPort("\\P"); + if (GetSize(P) >= GetSize(C) && P.extract(0, GetSize(C)) == C) { + cell->setPort("\\C", Const(0, 48)); + Wire *cascade = module->addWire(NEW_ID, 48); + driver->setPort("\\PCOUT", cascade); + cell->setPort("\\PCIN", cascade); + opmode[6] = State::S0; + opmode[5] = State::S0; + opmode[4] = State::S1; + bit_to_driver.erase(it); + } + else if (GetSize(P) >= GetSize(C)+17 && P.extract(17, GetSize(C)) == C) { + cell->setPort("\\C", Const(0, 48)); + Wire *cascade = module->addWire(NEW_ID, 48); + driver->setPort("\\PCOUT", cascade); + cell->setPort("\\PCIN", cascade); + opmode[6] = State::S1; + opmode[5] = State::S0; + opmode[4] = State::S1; + bit_to_driver.erase(it); + } + } + } + } +} XilinxDspPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg new file mode 100644 index 000000000..08cb1f51b --- /dev/null +++ b/passes/pmgen/xilinx_dsp.pmg @@ -0,0 +1,621 @@ +pattern xilinx_dsp + +udata <std::function<SigSpec(const SigSpec&)>> unextend +state <SigBit> clock +state <SigSpec> sigA sigffAcemuxY sigB sigffBcemuxY sigC sigffCcemuxY sigD sigffDcemuxY sigM sigP +state <IdString> postAddAB postAddMuxAB +state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffCcepol ffDcepol ffMcepol ffPcepol +state <bool> ffArstpol ffADrstpol ffBrstpol ffCrstpol ffDrstpol ffMrstpol ffPrstpol + +state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux +state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux ffC ffCcemux ffCrstmux +state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux + +// subpattern +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +match dsp + select dsp->type.in(\DSP48E1) +endmatch + +code sigA sigB sigC sigD sigM + unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigA = unextend(port(dsp, \A)); + sigB = unextend(port(dsp, \B)); + + sigC = dsp->connections_.at(\C, SigSpec()); + sigD = dsp->connections_.at(\D, SigSpec()); + + SigSpec P = port(dsp, \P); + if (dsp->parameters.at(\USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { + // Only care about those bits that are used + int i; + for (i = 0; i < GetSize(P); i++) { + if (nusers(P[i]) <= 1) + break; + sigM.append(P[i]); + } + log_assert(nusers(P.extract_end(i)) <= 1); + } + else + sigM = P; +endcode + +code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock + if (param(dsp, \ADREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffAD = dff; + clock = dffclock; + if (dffrstmux) { + ffADrstmux = dffrstmux; + ffADrstpol = dffrstpol; + } + if (dffcemux) { + ffADcemux = dffcemux; + ffADcepol = dffcepol; + } + sigA = dffD; + } + } +endcode + +match preAdd + if sigD.empty() || sigD.is_fully_zero() + // Ensure that preAdder not already used + if dsp->parameters.at(\USE_DPORT, Const("FALSE")).decode_string() == "FALSE" + if dsp->connections_.at(\INMODE, Const(0, 5)).is_fully_zero() + + select preAdd->type.in($add) + // Output has to be 25 bits or less + select GetSize(port(preAdd, \Y)) <= 25 + select nusers(port(preAdd, \Y)) == 2 + choice <IdString> AB {\A, \B} + // A port has to be 30 bits or less + select GetSize(port(preAdd, AB)) <= 30 + define <IdString> BA (AB == \A ? \B : \A) + // D port has to be 25 bits or less + select GetSize(port(preAdd, BA)) <= 25 + index <SigSpec> port(preAdd, \Y) === sigA + + optional +endmatch + +code sigA sigD + if (preAdd) { + sigA = port(preAdd, \A); + sigD = port(preAdd, \B); + if (GetSize(sigA) < GetSize(sigD)) + std::swap(sigA, sigD); + } +endcode + +code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol + // Only search for ffA2 if there was a pre-adder + // (otherwise ffA2 would have been matched as ffAD) + if (preAdd) { + if (param(dsp, \AREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffA2 = dff; + clock = dffclock; + if (dffrstmux) { + ffA2rstmux = dffrstmux; + ffArstpol = dffrstpol; + } + if (dffcemux) { + ffA2cepol = dffcepol; + ffA2cemux = dffcemux; + } + sigA = dffD; + } + } + } + // And if there wasn't a pre-adder, + // move AD register to A + else if (ffAD) { + log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux); + std::swap(ffA2, ffAD); + std::swap(ffA2cemux, ffADcemux); + std::swap(ffA2rstmux, ffADrstmux); + ffA2cepol = ffADcepol; + ffArstpol = ffADrstpol; + } + + // Now attempt to match A1 + if (ffA2) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffA1_end; + if (dffrstmux) { + if (ffArstpol != dffrstpol) + goto ffA1_end; + if (port(ffA2rstmux, \S) != port(dffrstmux, \S)) + goto ffA1_end; + ffA1rstmux = dffrstmux; + } + + ffA1 = dff; + clock = dffclock; + + if (dffcemux) { + ffA1cemux = dffcemux; + ffA1cepol = dffcepol; + } + sigA = dffD; + +ffA1_end: ; + } + } +endcode + +code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol + if (param(dsp, \BREG).as_int() == 0) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + ffB2 = dff; + clock = dffclock; + if (dffrstmux) { + ffB2rstmux = dffrstmux; + ffBrstpol = dffrstpol; + } + if (dffcemux) { + ffB2cemux = dffcemux; + ffB2cepol = dffcepol; + } + sigB = dffD; + + // Now attempt to match B1 + if (ffB2) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffB1_end; + if (dffrstmux) { + if (ffBrstpol != dffrstpol) + goto ffB1_end; + if (port(ffB2rstmux, \S) != port(dffrstmux, \S)) + goto ffB1_end; + ffB1rstmux = dffrstmux; + } + + ffB1 = dff; + clock = dffclock; + + if (dffcemux) { + ffB1cemux = dffcemux; + ffB1cepol = dffcepol; + } + sigB = dffD; + +ffB1_end: ; + } + } + + } + } +endcode + +code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock + if (param(dsp, \DREG).as_int() == 0) { + argQ = sigD; + subpattern(in_dffe); + if (dff) { + ffD = dff; + clock = dffclock; + if (dffrstmux) { + ffDrstmux = dffrstmux; + ffDrstpol = dffrstpol; + } + if (dffcemux) { + ffDcemux = dffcemux; + ffDcepol = dffcepol; + } + sigD = dffD; + } + } +endcode + +code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock + if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { + argD = sigM; + subpattern(out_dffe); + if (dff) { + ffM = dff; + clock = dffclock; + if (dffrstmux) { + ffMrstmux = dffrstmux; + ffMrstpol = dffrstpol; + } + if (dffcemux) { + ffMcemux = dffcemux; + ffMcepol = dffcepol; + } + sigM = dffQ; + } + } + sigP = sigM; +endcode + +match postAdd + // Ensure that Z mux is not already used + if port(dsp, \OPMODE).extract(4,3).is_fully_zero() + + select postAdd->type.in($add) + select GetSize(port(postAdd, \Y)) <= 48 + choice <IdString> AB {\A, \B} + select nusers(port(postAdd, AB)) <= 3 + filter ffMcemux || nusers(port(postAdd, AB)) == 2 + filter !ffMcemux || nusers(port(postAdd, AB)) == 3 + filter GetSize(unextend(port(postAdd, AB))) <= GetSize(sigP) + filter unextend(port(postAdd, AB)) == sigP.extract(0, GetSize(unextend(port(postAdd, AB)))) + filter nusers(sigP.extract_end(GetSize(unextend(port(postAdd, AB))))) <= 1 + set postAddAB AB + optional +endmatch + +code sigC sigP + if (postAdd) { + sigC = port(postAdd, postAddAB == \A ? \B : \A); + + // TODO for DSP48E1, which will have sign extended inputs/outputs + //int natural_mul_width = GetSize(port(dsp, \A)) + GetSize(port(dsp, \B)); + //int actual_mul_width = GetSize(sigP); + //int actual_acc_width = GetSize(sigC); + + //if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) + // reject; + //if ((actual_acc_width != actual_mul_width) && (param(dsp, \A_SIGNED).as_bool() != param(postAdd, \A_SIGNED).as_bool())) + // reject; + + sigP = port(postAdd, \Y); + } +endcode + +code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock + if (param(dsp, \PREG).as_int() == 0) { + int users = 2; + // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux + if (ffMcemux && !postAdd) users++; + if (nusers(sigP) == users) { + argD = sigP; + subpattern(out_dffe); + if (dff) { + ffP = dff; + clock = dffclock; + if (dffrstmux) { + ffPrstmux = dffrstmux; + ffPrstpol = dffrstpol; + } + if (dffcemux) { + ffPcemux = dffcemux; + ffPcepol = dffcepol; + } + sigP = dffQ; + } + } + } +endcode + +match postAddMux + if postAdd + if ffP + select postAddMux->type.in($mux) + select nusers(port(postAddMux, \Y)) == 2 + choice <IdString> AB {\A, \B} + index <SigSpec> port(postAddMux, AB) === sigP + index <SigSpec> port(postAddMux, \Y) === sigC + set postAddMuxAB AB + optional +endmatch + +code sigC + if (postAddMux) + sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); +endcode + +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + if (param(dsp, \CREG).as_int() == 0 && sigC != sigP) { + argQ = sigC; + subpattern(in_dffe); + if (dff) { + ffC = dff; + clock = dffclock; + if (dffrstmux) { + ffCrstmux = dffrstmux; + ffCrstpol = dffrstpol; + } + if (dffcemux) { + ffCcemux = dffcemux; + ffCcepol = dffcepol; + } + sigC = dffD; + } + } +endcode + +match overflow + if ffP + if dsp->parameters.at(\USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" + select overflow->type.in($ge) + select GetSize(port(overflow, \Y)) <= 48 + select port(overflow, \B).is_fully_const() + define <Const> B port(overflow, \B).as_const() + select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1 + index <SigSpec> port(overflow, \A) === sigP + optional +endmatch + +code + accept; +endcode + +// ####################### + +subpattern in_dffe +arg argD argQ clock + +code + dff = nullptr; + for (auto c : argQ.chunks()) { + if (!c.wire) + reject; + if (c.wire->get_bool_attribute(\keep)) + reject; + } +endcode + +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \Q)[offset] === argQ[0] + set ffoffset offset +endmatch + +code argQ argD +{ + if (clock != SigBit()) { + if (port(ff, \CLK) != clock) + reject; + } + + SigSpec Q = port(ff, \Q); + if (ffoffset + GetSize(argQ) > GetSize(Q)) + reject; + for (int i = 1; i < GetSize(argQ); i++) + if (Q[ffoffset+i] != argQ[i]) + reject; + + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +} +endcode + +match ffrstmux + if !argD.empty() + select ffrstmux->type.in($mux) + index <SigSpec> port(ffrstmux, \Y) === argD + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <bool> pol (BA == \B) + set ffrstpol pol + semioptional +endmatch + +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffcemux if argQ has at + // least 3 users (ff, <upstream>, ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; +endcode + +match ffcemux + if !argD.empty() + select ffcemux->type.in($mux) + index <SigSpec> port(ffcemux, \Y) === argD + choice <IdString> AB {\A, \B} + index <SigSpec> port(ffcemux, AB) === argQ + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, ffcepol ? \B : \A); + dffD.replace(port(ffcemux, \Y), argD); + } + else + dffcemux = nullptr; +endcode + +// ####################### + +subpattern out_dffe +arg argD argQ clock + +code + dff = nullptr; +endcode + +match ffcemux + select ffcemux->type.in($mux) + // ffcemux output must have two users: ffcemux and ff.D + select nusers(port(ffcemux, \Y)) == 2 + + choice <IdString> AB {\A, \B} + // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) + select nusers(port(ffcemux, AB)) >= 3 + + slice offset GetSize(port(ffcemux, \Y)) + define <IdString> BA (AB == \A ? \B : \A) + index <SigBit> port(ffcemux, BA)[offset] === argD[0] + set ffoffset offset + define <bool> pol (BA == \B) + set ffcepol pol + + semioptional +endmatch + +code argD argQ + dffcemux = ffcemux; + if (ffcemux) { + SigSpec BA = port(ffcemux, ffcepol ? \B : \A); + if (ffoffset + GetSize(argD) > GetSize(BA)) + reject; + for (int i = 1; i < GetSize(argD); i++) + if (BA[ffoffset+i] != argD[i]) + reject; + + SigSpec Y = port(ffcemux, \Y); + argQ = argD; + argD.replace(BA, Y); + argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); + + dffcemux = ffcemux; + dffcepol = ffcepol; + } +endcode + +match ffrstmux + select ffrstmux->type.in($mux) + // ffrstmux output must have two users: ffrstmux and ff.D + select nusers(port(ffrstmux, \Y)) == 2 + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + slice offset GetSize(port(ffrstmux, \Y)) + define <IdString> AB (BA == \B ? \A : \B) + index <SigBit> port(ffrstmux, AB)[offset] === argD[0] + + filter !ffcemux || ffoffset == offset + set ffoffset offset + define <bool> pol (AB == \A) + set ffrstpol pol + + semioptional +endmatch + +code argD argQ + dffrstmux = ffrstmux; + if (ffrstmux) { + SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); + if (ffoffset + GetSize(argD) > GetSize(AB)) + reject; + + for (int i = 1; i < GetSize(argD); i++) + if (AB[ffoffset+i] != argD[i]) + reject; + + SigSpec Y = port(ffrstmux, \Y); + argD.replace(AB, Y); + + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + } +endcode + +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \D)[offset] === argD[0] + + filter (!ffcemux && !ffrstmux) || ffoffset == offset + set ffoffset offset + + semioptional +endmatch + +code argQ + if (ff) { + if (clock != SigBit()) { + if (port(ff, \CLK) != clock) + reject; + } + + SigSpec D = port(ff, \D); + if (ffoffset + GetSize(argD) > GetSize(D)) + reject; + for (int i = 1; i < GetSize(argD); i++) + if (D[ffoffset+i] != argD[i]) + reject; + + SigSpec Q = port(ff, \Q); + if (ffcemux) { + for (int i = 0; i < GetSize(argQ); i++) + if (Q[ffoffset+i] != argQ[i]) + reject; + } + else { + argQ = argD; + argQ.replace(D, Q); + } + + for (auto c : argQ.chunks()) { + if (c.wire->get_bool_attribute(\keep)) + reject; + Const init = c.wire->attributes.at(\init, State::Sx); + if (!init.is_fully_undef() && !init.is_fully_zero()) + reject; + } + + dff = ff; + dffQ = argQ; + dffclock = port(dff, \CLK); + } + // No enable/reset mux possible without flop + else if (dffcemux || dffrstmux) + reject; +endcode diff --git a/passes/tests/test_autotb.cc b/passes/tests/test_autotb.cc index bfb1d6642..2b6a86c25 100644 --- a/passes/tests/test_autotb.cc +++ b/passes/tests/test_autotb.cc @@ -345,9 +345,17 @@ struct TestAutotbBackend : public Backend { log("value after initialization. This can e.g. be used to force a reset signal\n"); log("low in order to explore more inner states in a state machine.\n"); log("\n"); + log("The attribute 'gentb_skip' can be attached to modules to suppress testbench\n"); + log("generation.\n"); + log("\n"); log(" -n <int>\n"); log(" number of iterations the test bench should run (default = 1000)\n"); log("\n"); + log(" -seed <int>\n"); + log(" seed used for pseudo-random number generation (default = 0).\n"); + log(" a value of 0 will cause an arbitrary seed to be chosen, based on\n"); + log(" the current system time.\n"); + log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { |