diff options
34 files changed, 3094 insertions, 215 deletions
@@ -39,6 +39,11 @@ Yosys 0.9 .. Yosys 0.9-dev - Added "xilinx_srl" for Xilinx shift register extraction - Removed "shregmap -tech xilinx" (superseded by "xilinx_srl") - Added "_TECHMAP_WIREINIT_*_" attribute and "_TECHMAP_REMOVEINIT_*_" wire for "techmap" pass + - Added +/mul2dsp.v for decomposing wide multipliers to custom-sized ones + - Added "xilinx_dsp" for Xilinx DSP packing + - "synth_xilinx" to now infer DSP blocks (-nodsp to disable) + - "synth_ecp5" to now infer DSP blocks (-nodsp to disable, experimental) + - "synth_ice40 -dsp" to infer DSP blocks Yosys 0.8 .. Yosys 0.9 ---------------------- diff --git a/frontends/aiger/aigerparse.cc b/frontends/aiger/aigerparse.cc index a8d5abc1e..e8ee487e5 100644 --- a/frontends/aiger/aigerparse.cc +++ b/frontends/aiger/aigerparse.cc @@ -985,7 +985,7 @@ void AigerReader::post_process() // operate (and run checks on) this one module RTLIL::Design *mapped_design = new RTLIL::Design; mapped_design->add(module); - Pass::call(mapped_design, "clean -purge"); + Pass::call(mapped_design, "clean"); mapped_design->modules_.erase(module->name); delete mapped_design; diff --git a/passes/pmgen/.gitignore b/passes/pmgen/.gitignore index 6b319b8c3..e52f3282f 100644 --- a/passes/pmgen/.gitignore +++ b/passes/pmgen/.gitignore @@ -1 +1 @@ -/*_pm.h
\ No newline at end of file +/*_pm.h diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index 4989c582a..fcbaa99ed 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -21,12 +21,19 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h)) # -------------------------------------- +OBJS += passes/pmgen/xilinx_dsp.o +passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h)) + +# -------------------------------------- + OBJS += passes/pmgen/peepopt.o passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg +PEEPOPT_PATTERN += passes/pmgen/peepopt_dffmuxext.pmg passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) $(P) mkdir -p passes/pmgen && python3 $< -o $@ -p peepopt $(filter-out $<,$^) diff --git a/passes/pmgen/ice40_dsp.cc b/passes/pmgen/ice40_dsp.cc index 16bfe537f..68fc29f31 100644 --- a/passes/pmgen/ice40_dsp.cc +++ b/passes/pmgen/ice40_dsp.cc @@ -29,15 +29,15 @@ void create_ice40_dsp(ice40_dsp_pm &pm) { auto &st = pm.st_ice40_dsp; -#if 0 +#if 1 log("\n"); - log("ffA: %s\n", log_id(st.ffA, "--")); - log("ffB: %s\n", log_id(st.ffB, "--")); - log("mul: %s\n", log_id(st.mul, "--")); - log("ffY: %s\n", log_id(st.ffY, "--")); - log("addAB: %s\n", log_id(st.addAB, "--")); - log("muxAB: %s\n", log_id(st.muxAB, "--")); - log("ffS: %s\n", log_id(st.ffS, "--")); + log("ffA: %s\n", log_id(st.ffA, "--")); + log("ffB: %s\n", log_id(st.ffB, "--")); + log("mul: %s\n", log_id(st.mul, "--")); + log("ffFJKG: %s\n", log_id(st.ffFJKG, "--")); + log("addAB: %s\n", log_id(st.addAB, "--")); + log("muxAB: %s\n", log_id(st.muxAB, "--")); + log("ffO: %s\n", log_id(st.ffO, "--")); #endif log("Checking %s.%s for iCE40 DSP inference.\n", log_id(pm.module), log_id(st.mul)); @@ -52,42 +52,44 @@ void create_ice40_dsp(ice40_dsp_pm &pm) return; } - if (GetSize(st.sigS) > 32) { - log(" accumulator (%s) is too large (%d > 32).\n", log_signal(st.sigS), GetSize(st.sigS)); + if (GetSize(st.sigO) > 33) { + log(" adder/accumulator (%s) is too large (%d > 33).\n", log_signal(st.sigO), GetSize(st.sigO)); return; } - if (GetSize(st.sigY) > 32) { - log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigY), GetSize(st.sigY)); + if (GetSize(st.sigH) > 32) { + log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigH), GetSize(st.sigH)); return; } - bool mul_signed = st.mul->getParam("\\A_SIGNED").as_bool(); + Cell *cell = st.mul; + if (cell->type == "$mul") { + log(" replacing %s with SB_MAC16 cell.\n", log_id(st.mul->type)); - log(" replacing $mul with SB_MAC16 cell.\n"); - - Cell *cell = pm.module->addCell(NEW_ID, "\\SB_MAC16"); - pm.module->swap_names(cell, st.mul); + cell = pm.module->addCell(NEW_ID, "\\SB_MAC16"); + pm.module->swap_names(cell, st.mul); + } + else log_assert(cell->type == "\\SB_MAC16"); // SB_MAC16 Input Interface - SigSpec A = st.sigA; - A.extend_u0(16, mul_signed); + A.extend_u0(16, st.mul->getParam("\\A_SIGNED").as_bool()); + log_assert(GetSize(A) == 16); SigSpec B = st.sigB; - B.extend_u0(16, mul_signed); + B.extend_u0(16, st.mul->getParam("\\B_SIGNED").as_bool()); + log_assert(GetSize(B) == 16); - SigSpec CD; - if (st.muxA) - CD = st.muxA->getPort("\\B"); - if (st.muxB) - CD = st.muxB->getPort("\\A"); - CD.extend_u0(32, mul_signed); + SigSpec CD = st.sigCD; + if (CD.empty()) + CD = RTLIL::Const(0, 32); + else + log_assert(GetSize(CD) == 32); cell->setPort("\\A", A); cell->setPort("\\B", B); - cell->setPort("\\C", CD.extract(0, 16)); - cell->setPort("\\D", CD.extract(16, 16)); + cell->setPort("\\C", CD.extract(16, 16)); + cell->setPort("\\D", CD.extract(0, 16)); cell->setParam("\\A_REG", st.ffA ? State::S1 : State::S0); cell->setParam("\\B_REG", st.ffB ? State::S1 : State::S0); @@ -100,7 +102,7 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setPort("\\IRSTTOP", State::S0); cell->setPort("\\IRSTBOT", State::S0); - if (st.clock_vld) + if (st.clock != SigBit()) { cell->setPort("\\CLK", st.clock); cell->setPort("\\CE", State::S1); @@ -114,11 +116,11 @@ void create_ice40_dsp(ice40_dsp_pm &pm) if (st.ffB) log(" ffB:%s", log_id(st.ffB)); - if (st.ffY) - log(" ffY:%s", log_id(st.ffY)); + if (st.ffFJKG) + log(" ffFJKG:%s", log_id(st.ffFJKG)); - if (st.ffS) - log(" ffS:%s", log_id(st.ffS)); + if (st.ffO) + log(" ffO:%s", log_id(st.ffO)); log("\n"); } @@ -135,21 +137,43 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setPort("\\SIGNEXTOUT", pm.module->addWire(NEW_ID)); cell->setPort("\\CI", State::Sx); - cell->setPort("\\CO", pm.module->addWire(NEW_ID)); cell->setPort("\\ACCUMCI", State::Sx); cell->setPort("\\ACCUMCO", pm.module->addWire(NEW_ID)); // SB_MAC16 Output Interface - SigSpec O = st.ffS ? st.sigS : st.sigY; + SigSpec O = st.sigO; + int O_width = GetSize(O); + if (O_width == 33) { + log_assert(st.addAB); + // If we have a signed multiply-add, then perform sign extension + // TODO: Need to check CD[31:16] is sign extension of CD[15:0]? + if (st.addAB->getParam("\\A_SIGNED").as_bool() && st.addAB->getParam("\\B_SIGNED").as_bool()) + pm.module->connect(O[32], O[31]); + else + cell->setPort("\\CO", O[32]); + O.remove(O_width-1); + } + else + cell->setPort("\\CO", pm.module->addWire(NEW_ID)); + log_assert(GetSize(O) <= 32); if (GetSize(O) < 32) O.append(pm.module->addWire(NEW_ID, 32-GetSize(O))); cell->setPort("\\O", O); + bool accum = false; if (st.addAB) { - log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); + if (st.addA) + accum = (st.ffO && st.addAB->getPort("\\B") == st.sigO); + else if (st.addB) + accum = (st.ffO && st.addAB->getPort("\\A") == st.sigO); + else log_abort(); + if (accum) + log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); + else + log(" adder %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); cell->setPort("\\ADDSUBTOP", st.addAB->type == "$add" ? State::S0 : State::S1); cell->setPort("\\ADDSUBBOT", st.addAB->type == "$add" ? State::S0 : State::S1); } else { @@ -177,28 +201,43 @@ void create_ice40_dsp(ice40_dsp_pm &pm) cell->setParam("\\C_REG", State::S0); cell->setParam("\\D_REG", State::S0); - cell->setParam("\\TOP_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\BOT_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffY ? State::S1 : State::S0); + cell->setParam("\\TOP_8x8_MULT_REG", st.ffFJKG ? State::S1 : State::S0); + cell->setParam("\\BOT_8x8_MULT_REG", st.ffFJKG ? State::S1 : State::S0); + cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffFJKG ? State::S1 : State::S0); cell->setParam("\\PIPELINE_16x16_MULT_REG2", State::S0); - cell->setParam("\\TOPOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); cell->setParam("\\TOPADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\TOPADDSUB_UPPERINPUT", State::S0); + cell->setParam("\\TOPADDSUB_UPPERINPUT", accum ? State::S0 : State::S1); cell->setParam("\\TOPADDSUB_CARRYSELECT", Const(3, 2)); - cell->setParam("\\BOTOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); cell->setParam("\\BOTADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\BOTADDSUB_UPPERINPUT", State::S0); + cell->setParam("\\BOTADDSUB_UPPERINPUT", accum ? State::S0 : State::S1); cell->setParam("\\BOTADDSUB_CARRYSELECT", Const(0, 2)); cell->setParam("\\MODE_8x8", State::S0); - cell->setParam("\\A_SIGNED", mul_signed ? State::S1 : State::S0); - cell->setParam("\\B_SIGNED", mul_signed ? State::S1 : State::S0); + cell->setParam("\\A_SIGNED", st.mul->getParam("\\A_SIGNED").as_bool()); + cell->setParam("\\B_SIGNED", st.mul->getParam("\\B_SIGNED").as_bool()); + + if (st.ffO) { + if (st.ffO_lo) + cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + else + cell->setParam("\\TOPOUTPUT_SELECT", Const(1, 2)); + + st.ffO->connections_.at("\\Q").replace(O, pm.module->addWire(NEW_ID, GetSize(O))); + cell->setParam("\\BOTOUTPUT_SELECT", Const(1, 2)); + } + else { + cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + cell->setParam("\\BOTOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2)); + } - pm.autoremove(st.mul); - pm.autoremove(st.ffY); - pm.autoremove(st.ffS); + if (cell != st.mul) + pm.autoremove(st.mul); + else + pm.blacklist(st.mul); + pm.autoremove(st.ffFJKG); + pm.autoremove(st.addAB); } struct Ice40DspPass : public Pass { diff --git a/passes/pmgen/ice40_dsp.pmg b/passes/pmgen/ice40_dsp.pmg index 7003092bb..fbf498109 100644 --- a/passes/pmgen/ice40_dsp.pmg +++ b/passes/pmgen/ice40_dsp.pmg @@ -1,87 +1,137 @@ pattern ice40_dsp state <SigBit> clock -state <bool> clock_pol clock_vld -state <SigSpec> sigA sigB sigY sigS +state <bool> clock_pol cd_signed +state <SigSpec> sigA sigB sigCD sigH sigO state <Cell*> addAB muxAB match mul - select mul->type.in($mul) + select mul->type.in($mul, \SB_MAC16) select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10 - select GetSize(mul->getPort(\Y)) > 10 endmatch +code sigA sigB sigH + SigSpec O; + if (mul->type == $mul) + O = mul->getPort(\Y); + else if (mul->type == \SB_MAC16) + O = mul->getPort(\O); + else log_abort(); + if (GetSize(O) <= 10) + reject; + + sigA = port(mul, \A); + int i; + for (i = GetSize(sigA)-1; i > 0; i--) + if (sigA[i] != sigA[i-1]) + break; + // Do not remove non-const sign bit + if (sigA[i].wire) + ++i; + sigA.remove(i, GetSize(sigA)-i); + sigB = port(mul, \B); + for (i = GetSize(sigB)-1; i > 0; i--) + if (sigB[i] != sigB[i-1]) + break; + // Do not remove non-const sign bit + if (sigB[i].wire) + ++i; + sigB.remove(i, GetSize(sigB)-i); + + // Only care about those bits that are used + for (i = 0; i < GetSize(O); i++) { + if (nusers(O[i]) <= 1) + break; + sigH.append(O[i]); + } + log_assert(nusers(O.extract_end(i)) <= 1); +endcode + match ffA + if mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool() select ffA->type.in($dff) - // select nusers(port(ffA, \Q)) == 2 - index <SigSpec> port(ffA, \Q) === port(mul, \A) + filter GetSize(port(ffA, \Q)) >= GetSize(sigA) + slice offset GetSize(port(ffA, \Q)) + filter offset+GetSize(sigA) <= GetSize(port(ffA, \Q)) && port(ffA, \Q).extract(offset, GetSize(sigA)) == sigA optional endmatch -code sigA clock clock_pol clock_vld - sigA = port(mul, \A); - +code sigA clock clock_pol if (ffA) { - sigA = port(ffA, \D); + for (auto b : port(ffA, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; clock = port(ffA, \CLK).as_bit(); clock_pol = param(ffA, \CLK_POLARITY).as_bool(); - clock_vld = true; + + sigA.replace(port(ffA, \Q), port(ffA, \D)); } endcode match ffB + if mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool() select ffB->type.in($dff) - // select nusers(port(ffB, \Q)) == 2 - index <SigSpec> port(ffB, \Q) === port(mul, \B) + filter GetSize(port(ffB, \Q)) >= GetSize(sigB) + slice offset GetSize(port(ffB, \Q)) + filter offset+GetSize(sigB) <= GetSize(port(ffB, \Q)) && port(ffB, \Q).extract(offset, GetSize(sigB)) == sigB optional endmatch -code sigB clock clock_pol clock_vld - sigB = port(mul, \B); - +code sigB clock clock_pol if (ffB) { - sigB = port(ffB, \D); + for (auto b : port(ffB, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; + SigBit c = port(ffB, \CLK).as_bit(); bool cp = param(ffB, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; + + sigB.replace(port(ffB, \Q), port(ffB, \D)); } endcode -match ffY - select ffY->type.in($dff) - select nusers(port(ffY, \D)) == 2 - index <SigSpec> port(ffY, \D) === port(mul, \Y) +match ffFJKG + // Ensure pipeline register is not already used + if mul->type != \SB_MAC16 || (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool()) + select ffFJKG->type.in($dff) + select nusers(port(ffFJKG, \D)) == 2 + index <SigSpec> port(ffFJKG, \D) === sigH optional endmatch -code sigY clock clock_pol clock_vld - sigY = port(mul, \Y); +code sigH sigO clock clock_pol + if (ffFJKG) { + sigH = port(ffFJKG, \Q); + for (auto b : sigH) + if (b.wire->get_bool_attribute(\keep)) + reject; - if (ffY) { - sigY = port(ffY, \Q); - SigBit c = port(ffY, \CLK).as_bit(); - bool cp = param(ffY, \CLK_POLARITY).as_bool(); + SigBit c = port(ffFJKG, \CLK).as_bit(); + bool cp = param(ffFJKG, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; } + + sigO = sigH; endcode match addA select addA->type.in($add) select nusers(port(addA, \A)) == 2 - index <SigSpec> port(addA, \A) === sigY + filter param(addA, \A_WIDTH).as_int() <= GetSize(sigH) + //index <SigSpec> port(addA, \A) === sigH.extract(0, param(addA, \A_WIDTH).as_int()) + filter port(addA, \A) == sigH.extract(0, param(addA, \A_WIDTH).as_int()) optional endmatch @@ -89,75 +139,134 @@ match addB if !addA select addB->type.in($add, $sub) select nusers(port(addB, \B)) == 2 - index <SigSpec> port(addB, \B) === sigY + filter param(addB, \B_WIDTH).as_int() <= GetSize(sigH) + //index <SigSpec> port(addB, \B) === sigH.extract(0, param(addB, \B_WIDTH).as_int()) + filter port(addB, \B) == sigH.extract(0, param(addB, \B_WIDTH).as_int()) optional endmatch -code addAB sigS +code addAB sigCD sigO cd_signed if (addA) { addAB = addA; - sigS = port(addA, \B); + sigCD = port(addAB, \B); + cd_signed = param(addAB, \B_SIGNED).as_bool(); } - if (addB) { + else if (addB) { addAB = addB; - sigS = port(addB, \A); + sigCD = port(addAB, \A); + cd_signed = param(addAB, \A_SIGNED).as_bool(); } if (addAB) { + if (mul->type == \SB_MAC16) { + // Ensure that adder is not used + if (param(mul, \TOPOUTPUT_SELECT).as_int() != 3 || + param(mul, \BOTOUTPUT_SELECT).as_int() != 3) + reject; + } + int natural_mul_width = GetSize(sigA) + GetSize(sigB); - int actual_mul_width = GetSize(sigY); - int actual_acc_width = GetSize(sigS); + int actual_mul_width = GetSize(sigH); + int actual_acc_width = GetSize(sigCD); if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) reject; - if ((actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool())) + // If accumulator, check adder width and signedness + if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool())) reject; + + sigO = port(addAB, \Y); } endcode match muxA - if addAB select muxA->type.in($mux) - select nusers(port(muxA, \A)) == 2 - index <SigSpec> port(muxA, \A) === port(addAB, \Y) + index <int> nusers(port(muxA, \A)) === 2 + index <SigSpec> port(muxA, \A) === sigO optional endmatch match muxB - if addAB if !muxA select muxB->type.in($mux) - select nusers(port(muxB, \B)) == 2 - index <SigSpec> port(muxB, \B) === port(addAB, \Y) + index <int> nusers(port(muxB, \B)) === 2 + index <SigSpec> port(muxB, \B) === sigO optional endmatch -code muxAB - muxAB = addAB; +code muxAB sigO if (muxA) muxAB = muxA; - if (muxB) + else if (muxB) muxAB = muxB; + if (muxAB) + sigO = port(muxAB, \Y); endcode -match ffS - if muxAB - select ffS->type.in($dff) - select nusers(port(ffS, \D)) == 2 - index <SigSpec> port(ffS, \D) === port(muxAB, \Y) - index <SigSpec> port(ffS, \Q) === sigS +match ffO + // Ensure that register is not already used + if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1) + // Ensure that OLOADTOP/OLOADBOT is unused or zero + if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero()) + if nusers(sigO) == 2 + select ffO->type.in($dff) + filter GetSize(port(ffO, \D)) >= GetSize(sigO) + slice offset GetSize(port(ffO, \D)) + filter offset+GetSize(sigO) <= GetSize(port(ffO, \D)) && port(ffO, \D).extract(offset, GetSize(sigO)) == sigO + optional +endmatch + +match ffO_lo + if !ffO && GetSize(sigO) > 16 + // Ensure that register is not already used + if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1) + // Ensure that OLOADTOP/OLOADBOT is unused or zero + if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero()) + if nusers(sigO.extract(0, 16)) == 2 + select ffO_lo->type.in($dff) + filter GetSize(port(ffO_lo, \D)) >= 16 + slice offset GetSize(port(ffO_lo, \D)) + filter offset+GetSize(sigO) <= GetSize(port(ffO_lo, \D)) && port(ffO_lo, \D).extract(offset, 16) == sigO.extract(0, 16) + optional endmatch -code clock clock_pol clock_vld - if (ffS) { - SigBit c = port(ffS, \CLK).as_bit(); - bool cp = param(ffS, \CLK_POLARITY).as_bool(); +code ffO clock clock_pol sigO sigCD cd_signed + if (ffO_lo) { + log_assert(!ffO); + ffO = ffO_lo; + } + if (ffO) { + for (auto b : port(ffO, \Q)) + if (b.wire->get_bool_attribute(\keep)) + reject; + + SigBit c = port(ffO, \CLK).as_bit(); + bool cp = param(ffO, \CLK_POLARITY).as_bool(); - if (clock_vld && (c != clock || cp != clock_pol)) + if (clock != SigBit() && (c != clock || cp != clock_pol)) reject; clock = c; clock_pol = cp; - clock_vld = true; + + sigO.replace(port(ffO, \D), port(ffO, \Q)); + + // Loading value into output register is not + // supported unless using accumulator + if (muxAB) { + if (sigCD != sigO) + reject; + if (muxA) + sigCD = port(muxAB, \B); + else if (muxB) + sigCD = port(muxAB, \A); + else log_abort(); + + cd_signed = addAB && param(addAB, \A_SIGNED).as_bool() && param(addAB, \B_SIGNED).as_bool(); + } } + sigCD.extend_u0(32, cd_signed); +endcode + +code accept; endcode diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index e7f95cf85..b57d26cef 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -60,6 +60,7 @@ struct PeepoptPass : public Pass { peepopt_pm pm(module, module->selected_cells()); pm.run_shiftmul(); pm.run_muldiv(); + pm.run_dffmuxext(); } } } diff --git a/passes/pmgen/peepopt_dffmuxext.pmg b/passes/pmgen/peepopt_dffmuxext.pmg new file mode 100644 index 000000000..2465d6171 --- /dev/null +++ b/passes/pmgen/peepopt_dffmuxext.pmg @@ -0,0 +1,55 @@ +pattern dffmuxext + +state <IdString> muxAB + +match dff + select dff->type == $dff + select GetSize(port(dff, \D)) > 1 +endmatch + +match mux + select mux->type == $mux + select GetSize(port(mux, \Y)) > 1 + choice <IdString> AB {\A, \B} + //select port(mux, AB)[GetSize(port(mux, \Y))-1].wire + index <SigSpec> port(mux, \Y) === port(dff, \D) + define <IdString> BA (AB == \A ? \B : \A) + index <SigSpec> port(mux, BA) === port(dff, \Q) + filter port(mux, AB)[GetSize(port(mux, \Y))-1] == port(mux, AB)[GetSize(port(mux, \Y))-2] + set muxAB AB +endmatch + +code + did_something = true; + + SigSpec &D = mux->connections_.at(muxAB); + SigSpec &Q = dff->connections_.at(\Q); + int width = GetSize(D); + + SigBit sign = D[width-1]; + bool is_signed = sign.wire; + int i; + for (i = width-1; i >= 2; i--) { + if (!is_signed) { + module->connect(Q[i], sign); + if (D[i-1] != sign) + break; + } + else { + module->connect(Q[i], Q[i-1]); + if (D[i-2] != sign) + break; + } + } + + mux->connections_.at(\A).remove(i, width-i); + mux->connections_.at(\B).remove(i, width-i); + mux->connections_.at(\Y).remove(i, width-i); + mux->fixup_parameters(); + dff->connections_.at(\D).remove(i, width-i); + dff->connections_.at(\Q).remove(i, width-i); + dff->fixup_parameters(); + + log("dffmuxext pattern in %s: dff=%s, mux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(mux), width-i); + accept; +endcode diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc new file mode 100644 index 000000000..5d50c7795 --- /dev/null +++ b/passes/pmgen/xilinx_dsp.cc @@ -0,0 +1,593 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "passes/pmgen/xilinx_dsp_pm.h" + +static Cell* addDsp(Module *module) { + Cell *cell = module->addCell(NEW_ID, "\\DSP48E1"); + cell->setParam("\\ACASCREG", 0); + cell->setParam("\\ADREG", 0); + cell->setParam("\\A_INPUT", Const("DIRECT")); + cell->setParam("\\ALUMODEREG", 0); + cell->setParam("\\AREG", 0); + cell->setParam("\\BCASCREG", 0); + cell->setParam("\\B_INPUT", Const("DIRECT")); + cell->setParam("\\BREG", 0); + cell->setParam("\\CARRYINREG", 0); + cell->setParam("\\CARRYINSELREG", 0); + cell->setParam("\\CREG", 0); + cell->setParam("\\DREG", 0); + cell->setParam("\\INMODEREG", 0); + cell->setParam("\\MREG", 0); + cell->setParam("\\OPMODEREG", 0); + cell->setParam("\\PREG", 0); + cell->setParam("\\USE_MULT", Const("NONE")); + + cell->setPort("\\D", Const(0, 24)); + cell->setPort("\\INMODE", Const(0, 5)); + cell->setPort("\\ALUMODE", Const(0, 4)); + cell->setPort("\\OPMODE", Const(0, 7)); + cell->setPort("\\CARRYINSEL", Const(0, 3)); + cell->setPort("\\ACIN", Const(0, 30)); + cell->setPort("\\BCIN", Const(0, 18)); + cell->setPort("\\PCIN", Const(0, 48)); + cell->setPort("\\CARRYIN", Const(0, 1)); + return cell; +} + +void pack_xilinx_simd(Module *module, const std::vector<Cell*> &selected_cells) +{ + std::deque<Cell*> simd12_add, simd12_sub; + std::deque<Cell*> simd24_add, simd24_sub; + + for (auto cell : selected_cells) { + if (!cell->type.in("$add", "$sub")) + continue; + SigSpec Y = cell->getPort("\\Y"); + if (!Y.is_chunk()) + continue; + if (!Y.as_chunk().wire->get_strpool_attribute("\\use_dsp").count("simd")) + continue; + if (GetSize(Y) > 25) + continue; + SigSpec A = cell->getPort("\\A"); + SigSpec B = cell->getPort("\\B"); + if (GetSize(Y) <= 13) { + if (GetSize(A) > 12) + continue; + if (GetSize(B) > 12) + continue; + if (cell->type == "$add") + simd12_add.push_back(cell); + else if (cell->type == "$sub") + simd12_sub.push_back(cell); + } + else if (GetSize(Y) <= 25) { + if (GetSize(A) > 24) + continue; + if (GetSize(B) > 24) + continue; + if (cell->type == "$add") + simd24_add.push_back(cell); + else if (cell->type == "$sub") + simd24_sub.push_back(cell); + } + else + log_abort(); + } + + auto f12 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort("\\A"); + SigSpec B = lane->getPort("\\B"); + SigSpec Y = lane->getPort("\\Y"); + A.extend_u0(12, lane->getParam("\\A_SIGNED").as_bool()); + B.extend_u0(12, lane->getParam("\\B_SIGNED").as_bool()); + AB.append(A); + C.append(B); + if (GetSize(Y) < 13) + Y.append(module->addWire(NEW_ID, 13-GetSize(Y))); + else + log_assert(GetSize(Y) == 13); + P.append(Y.extract(0, 12)); + CARRYOUT.append(Y[12]); + }; + auto g12 = [&f12,module](std::deque<Cell*> &simd12) { + while (simd12.size() > 1) { + SigSpec AB, C, P, CARRYOUT; + + Cell *lane1 = simd12.front(); + simd12.pop_front(); + Cell *lane2 = simd12.front(); + simd12.pop_front(); + Cell *lane3 = nullptr; + Cell *lane4 = nullptr; + + if (!simd12.empty()) { + lane3 = simd12.front(); + simd12.pop_front(); + if (!simd12.empty()) { + lane4 = simd12.front(); + simd12.pop_front(); + } + } + + log("Analysing %s.%s for Xilinx DSP SIMD12 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam("\\USE_SIMD", Const("FOUR12")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort("\\OPMODE", Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f12(AB, C, P, CARRYOUT, lane1); + f12(AB, C, P, CARRYOUT, lane2); + if (lane3) { + f12(AB, C, P, CARRYOUT, lane3); + if (lane4) + f12(AB, C, P, CARRYOUT, lane4); + else { + AB.append(Const(0, 12)); + C.append(Const(0, 12)); + P.append(module->addWire(NEW_ID, 12)); + CARRYOUT.append(module->addWire(NEW_ID, 1)); + } + } + else { + AB.append(Const(0, 24)); + C.append(Const(0, 24)); + P.append(module->addWire(NEW_ID, 24)); + CARRYOUT.append(module->addWire(NEW_ID, 2)); + } + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort("\\A", AB.extract(18, 30)); + cell->setPort("\\B", AB.extract(0, 18)); + cell->setPort("\\C", C); + cell->setPort("\\P", P); + cell->setPort("\\CARRYOUT", CARRYOUT); + if (lane1->type == "$sub") + cell->setPort("\\ALUMODE", Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + if (lane3) module->remove(lane3); + if (lane4) module->remove(lane4); + + module->design->select(module, cell); + } + }; + g12(simd12_add); + g12(simd12_sub); + + auto f24 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort("\\A"); + SigSpec B = lane->getPort("\\B"); + SigSpec Y = lane->getPort("\\Y"); + A.extend_u0(24, lane->getParam("\\A_SIGNED").as_bool()); + B.extend_u0(24, lane->getParam("\\B_SIGNED").as_bool()); + C.append(A); + AB.append(B); + if (GetSize(Y) < 25) + Y.append(module->addWire(NEW_ID, 25-GetSize(Y))); + else + log_assert(GetSize(Y) == 25); + P.append(Y.extract(0, 24)); + CARRYOUT.append(module->addWire(NEW_ID)); // TWO24 uses every other bit + CARRYOUT.append(Y[24]); + }; + auto g24 = [&f24,module](std::deque<Cell*> &simd24) { + while (simd24.size() > 1) { + SigSpec AB; + SigSpec C; + SigSpec P; + SigSpec CARRYOUT; + + Cell *lane1 = simd24.front(); + simd24.pop_front(); + Cell *lane2 = simd24.front(); + simd24.pop_front(); + + log("Analysing %s.%s for Xilinx DSP SIMD24 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam("\\USE_SIMD", Const("TWO24")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort("\\OPMODE", Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f24(AB, C, P, CARRYOUT, lane1); + f24(AB, C, P, CARRYOUT, lane2); + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort("\\A", AB.extract(18, 30)); + cell->setPort("\\B", AB.extract(0, 18)); + cell->setPort("\\C", C); + cell->setPort("\\P", P); + cell->setPort("\\CARRYOUT", CARRYOUT); + if (lane1->type == "$sub") + cell->setPort("\\ALUMODE", Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + + module->design->select(module, cell); + } + }; + g24(simd24_add); + g24(simd24_sub); +} + + +void pack_xilinx_dsp(dict<SigBit, Cell*> &bit_to_driver, xilinx_dsp_pm &pm) +{ + auto &st = pm.st_xilinx_dsp; + +#if 1 + log("\n"); + log("preAdd: %s\n", log_id(st.preAdd, "--")); + log("ffAD: %s\n", log_id(st.ffAD, "--")); + log("ffADmux: %s\n", log_id(st.ffADmux, "--")); + log("ffA: %s\n", log_id(st.ffA, "--")); + log("ffAmux: %s\n", log_id(st.ffAmux, "--")); + log("ffB: %s\n", log_id(st.ffB, "--")); + log("ffBmux: %s\n", log_id(st.ffBmux, "--")); + log("ffC: %s\n", log_id(st.ffC, "--")); + log("ffCmux: %s\n", log_id(st.ffCmux, "--")); + log("ffD: %s\n", log_id(st.ffD, "--")); + log("ffDmux: %s\n", log_id(st.ffDmux, "--")); + log("dsp: %s\n", log_id(st.dsp, "--")); + log("ffM: %s\n", log_id(st.ffM, "--")); + log("ffMmux: %s\n", log_id(st.ffMmux, "--")); + log("postAdd: %s\n", log_id(st.postAdd, "--")); + log("postAddMux: %s\n", log_id(st.postAddMux, "--")); + log("ffP: %s\n", log_id(st.ffP, "--")); + log("ffPcemux: %s\n", log_id(st.ffPcemux, "--")); + log("ffPrstmux: %s\n", log_id(st.ffPrstmux, "--")); +#endif + + log("Analysing %s.%s for Xilinx DSP packing.\n", log_id(pm.module), log_id(st.dsp)); + + Cell *cell = st.dsp; + bit_to_driver.insert(std::make_pair(cell->getPort("\\P")[17], cell)); + SigSpec P = st.sigP; + + if (st.preAdd) { + log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type)); + bool A_SIGNED = st.preAdd->getParam("\\A_SIGNED").as_bool(); + bool D_SIGNED = st.preAdd->getParam("\\B_SIGNED").as_bool(); + if (st.sigA == st.preAdd->getPort("\\B")) + std::swap(A_SIGNED, D_SIGNED); + st.sigA.extend_u0(30, A_SIGNED); + st.sigD.extend_u0(25, D_SIGNED); + cell->setPort("\\A", st.sigA); + cell->setPort("\\D", st.sigD); + cell->connections_.at("\\INMODE") = Const::from_string("00100"); + + if (st.ffAD) { + if (st.ffADmux) { + SigSpec S = st.ffADmux->getPort("\\S"); + cell->setPort("\\CEAD", st.ffADcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CEAD", State::S1); + cell->setParam("\\ADREG", 1); + } + + cell->setParam("\\USE_DPORT", Const("TRUE")); + + pm.autoremove(st.preAdd); + } + if (st.postAdd) { + log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type)); + + SigSpec &opmode = cell->connections_.at("\\OPMODE"); + if (st.postAddMux) { + log_assert(st.ffP); + opmode[4] = st.postAddMux->getPort("\\S"); + pm.autoremove(st.postAddMux); + } + else if (st.ffP && st.sigC == P) + opmode[4] = State::S0; + else + opmode[4] = State::S1; + opmode[6] = State::S0; + opmode[5] = State::S1; + + if (opmode[4] != State::S0) { + if (st.postAddMuxAB == "\\A") + st.sigC.extend_u0(48, st.postAdd->getParam("\\B_SIGNED").as_bool()); + else + st.sigC.extend_u0(48, st.postAdd->getParam("\\A_SIGNED").as_bool()); + cell->setPort("\\C", st.sigC); + } + + pm.autoremove(st.postAdd); + } + + if (st.clock != SigBit()) + { + cell->setPort("\\CLK", st.clock); + + if (st.ffA) { + SigSpec A = cell->getPort("\\A"); + SigSpec D = st.ffA->getPort("\\D"); + SigSpec Q = pm.sigmap(st.ffA->getPort("\\Q")); + A.replace(Q, D); + if (st.ffAmux) { + SigSpec Y = st.ffAmux->getPort("\\Y"); + SigSpec AB = st.ffAmux->getPort(st.ffAcepol ? "\\B" : "\\A"); + SigSpec S = st.ffAmux->getPort("\\S"); + A.replace(Y, AB); + cell->setPort("\\CEA2", st.ffAcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CEA2", State::S1); + cell->setPort("\\A", A); + + cell->setParam("\\AREG", 1); + } + if (st.ffB) { + SigSpec B = cell->getPort("\\B"); + SigSpec D = st.ffB->getPort("\\D"); + SigSpec Q = st.ffB->getPort("\\Q"); + B.replace(Q, D); + if (st.ffBmux) { + SigSpec Y = st.ffBmux->getPort("\\Y"); + SigSpec AB = st.ffBmux->getPort(st.ffBcepol ? "\\B" : "\\A"); + SigSpec S = st.ffBmux->getPort("\\S"); + B.replace(Y, AB); + cell->setPort("\\CEB2", st.ffBcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CEB2", State::S1); + cell->setPort("\\B", B); + + cell->setParam("\\BREG", 1); + } + if (st.ffC) { + SigSpec C = cell->getPort("\\C"); + SigSpec D = st.ffC->getPort("\\D"); + SigSpec Q = st.ffC->getPort("\\Q"); + C.replace(Q, D); + + if (st.ffCmux) { + SigSpec Y = st.ffCmux->getPort("\\Y"); + SigSpec AB = st.ffCmux->getPort(st.ffCcepol ? "\\B" : "\\A"); + SigSpec S = st.ffCmux->getPort("\\S"); + C.replace(Y, AB); + + cell->setPort("\\CEC", st.ffCcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CEC", State::S1); + cell->setPort("\\C", C); + + cell->setParam("\\CREG", 1); + } + if (st.ffD) { + SigSpec D_ = cell->getPort("\\D"); + SigSpec D = st.ffD->getPort("\\D"); + SigSpec Q = st.ffD->getPort("\\Q"); + D_.replace(Q, D); + + if (st.ffDmux) { + SigSpec Y = st.ffDmux->getPort("\\Y"); + SigSpec AB = st.ffDmux->getPort(st.ffDcepol ? "\\B" : "\\A"); + SigSpec S = st.ffDmux->getPort("\\S"); + D_.replace(Y, AB); + + cell->setPort("\\CED", st.ffDcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort("\\CED", State::S1); + cell->setPort("\\D", D_); + + cell->setParam("\\DREG", 1); + } + if (st.ffM) { + if (st.ffMmux) { + SigSpec S = st.ffMmux->getPort("\\S"); + cell->setPort("\\CEM", st.ffMcepol ? S : pm.module->Not(NEW_ID, S)); + pm.autoremove(st.ffMmux); + } + else + cell->setPort("\\CEM", State::S1); + SigSpec D = st.ffM->getPort("\\D"); + SigSpec Q = st.ffM->getPort("\\Q"); + P.replace(pm.sigmap(D), Q); + + cell->setParam("\\MREG", State::S1); + pm.autoremove(st.ffM); + } + if (st.ffP) { + if (st.ffPrstmux) { + SigSpec S = st.ffPrstmux->getPort("\\S"); + cell->setPort("\\RSTP", st.ffPrstpol ? S : pm.module->Not(NEW_ID, S)); + st.ffPrstmux->connections_.at("\\Y").replace(P, pm.module->addWire(NEW_ID, GetSize(P))); + } + else + cell->setPort("\\RSTP", State::S0); + if (st.ffPcemux) { + SigSpec S = st.ffPcemux->getPort("\\S"); + cell->setPort("\\CEP", st.ffPcepol ? S : pm.module->Not(NEW_ID, S)); + st.ffPcemux->connections_.at("\\Y").replace(P, pm.module->addWire(NEW_ID, GetSize(P))); + } + else + cell->setPort("\\CEP", State::S1); + SigSpec D = st.ffP->getPort("\\D"); + SigSpec Q = st.ffP->getPort("\\Q"); + P.replace(pm.sigmap(D), Q); + st.ffP->connections_.at("\\Q").replace(P, pm.module->addWire(NEW_ID, GetSize(P))); + + for (auto c : Q.chunks()) { + auto it = c.wire->attributes.find("\\init"); + if (it == c.wire->attributes.end()) + continue; + for (int i = c.offset; i < c.offset+c.width; i++) { + log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); + it->second[i] = State::Sx; + } + } + + cell->setParam("\\PREG", State::S1); + } + + log(" clock: %s (%s)", log_signal(st.clock), "posedge"); + + if (st.ffA) + log(" ffA:%s", log_id(st.ffA)); + + if (st.ffAD) + log(" ffAD:%s", log_id(st.ffAD)); + + if (st.ffB) + log(" ffB:%s", log_id(st.ffB)); + + if (st.ffC) + log(" ffC:%s", log_id(st.ffC)); + + if (st.ffD) + log(" ffD:%s", log_id(st.ffD)); + + if (st.ffM) + log(" ffM:%s", log_id(st.ffM)); + + if (st.ffP) + log(" ffP:%s", log_id(st.ffP)); + + log("\n"); + } + + if (GetSize(P) < 48) + P.append(pm.module->addWire(NEW_ID, 48-GetSize(P))); + cell->setPort("\\P", P); + + pm.blacklist(cell); +} + +struct XilinxDspPass : public Pass { + XilinxDspPass() : Pass("xilinx_dsp", "Xilinx: pack resources into DSPs") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" xilinx_dsp [options] [selection]\n"); + log("\n"); + log("Pack input registers (A, B, C, D, AD; with optional enable), pipeline registers\n"); + log("(M; with optional enable), output registers (P; with optional enable),\n"); + log("pre-adder and/or post-adder into Xilinx DSP resources.\n"); + log("\n"); + log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n"); + log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n"); + log("used to override the existing accumulation result with a new value.\n"); + log("\n"); + log("Use of the dedicated 'PCOUT' -> 'PCIN' path is detected for 'P' -> 'C' connections\n"); + log("where 'P' is right-shifted by 18-bits and used as an input to the post-adder (a\n"); + log("pattern common for summing partial products to implement wide multiplies).\n"); + log("\n"); + log("Not currently supported: reset (RST*) inputs on any register.\n"); + log("\n"); + log("\n"); + log("Experimental feature: addition/subtractions less than 12 or 24 bits with the\n"); + log("'(* use_dsp=\"simd\" *)' attribute attached to the output wire or attached to\n"); + log("the add/subtract operator will cause those operations to be implemented using\n"); + log("the 'SIMD' feature of DSPs.\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + // if (args[argidx] == "-singleton") { + // singleton_mode = true; + // continue; + // } + break; + } + extra_args(args, argidx, design); + + for (auto module : design->selected_modules()) { + pack_xilinx_simd(module, module->selected_cells()); + + xilinx_dsp_pm pm(module, module->selected_cells()); + dict<SigBit, Cell*> bit_to_driver; + auto f = [&bit_to_driver](xilinx_dsp_pm &pm){ pack_xilinx_dsp(bit_to_driver, pm); }; + pm.run_xilinx_dsp(f); + + // Look for ability to convert C input from another DSP into PCIN + // NB: Needs to be done after pattern matcher has folded all + // $add cells into the DSP + for (auto cell : module->cells()) { + if (cell->type != "\\DSP48E1") + continue; + if (cell->parameters.at("\\CREG", State::S1).as_bool()) + continue; + SigSpec &opmode = cell->connections_.at("\\OPMODE"); + if (opmode.extract(4,3) != Const::from_string("011")) + continue; + SigSpec C = pm.sigmap(cell->getPort("\\C")); + if (C.has_const()) + continue; + auto it = bit_to_driver.find(C[0]); + if (it == bit_to_driver.end()) + continue; + auto driver = it->second; + + // Unextend C + int i; + for (i = GetSize(C)-1; i > 0; i--) + if (C[i] != C[i-1]) + break; + if (i > 48-17) + continue; + if (driver->getPort("\\P").extract(17, i) == C.extract(0, i)) { + cell->setPort("\\C", Const(0, 48)); + Wire *cascade = module->addWire(NEW_ID, 48); + driver->setPort("\\PCOUT", cascade); + cell->setPort("\\PCIN", cascade); + opmode[6] = State::S1; + opmode[5] = State::S0; + opmode[4] = State::S1; + bit_to_driver.erase(it); + } + } + } + } +} XilinxDspPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg new file mode 100644 index 000000000..7db8e95a6 --- /dev/null +++ b/passes/pmgen/xilinx_dsp.pmg @@ -0,0 +1,467 @@ +pattern xilinx_dsp + +state <std::function<SigSpec(const SigSpec&)>> unextend +state <SigBit> clock +state <SigSpec> sigA sigffAmuxY sigB sigffBmuxY sigC sigffCmuxY sigD sigffDmuxY sigM sigP +state <IdString> postAddAB postAddMuxAB +state <bool> ffAcepol ffADcepol ffBcepol ffCcepol ffDcepol ffMcepol ffPcepol ffPrstpol +state <int> ffPoffset + +state <Cell*> ffAD ffADmux ffA ffAmux ffB ffBmux ffC ffCmux ffD ffDmux ffM ffMmux ffP ffPcemux ffPrstmux + +// subpattern +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +match dsp + select dsp->type.in(\DSP48E1) +endmatch + +code unextend sigA sigB sigC sigD sigM + unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigA = unextend(port(dsp, \A)); + sigB = unextend(port(dsp, \B)); + + sigC = dsp->connections_.at(\C, SigSpec()); + sigD = dsp->connections_.at(\D, SigSpec()); + + SigSpec P = port(dsp, \P); + if (dsp->parameters.at(\USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { + // Only care about those bits that are used + int i; + for (i = 0; i < GetSize(P); i++) { + if (nusers(P[i]) <= 1) + break; + sigM.append(P[i]); + } + log_assert(nusers(P.extract_end(i)) <= 1); + } + else + sigM = P; +endcode + +code argQ ffAD ffADmux ffADcepol sigA clock + if (param(dsp, \ADREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffAD = dff; + clock = dffclock; + if (dffcemux) { + ffADmux = dffcemux; + ffADcepol = dffcepol; + } + sigA = dffD; + } + } +endcode + +match preAdd + if sigD.empty() || sigD.is_fully_zero() + // Ensure that preAdder not already used + if dsp->parameters.at(\USE_DPORT, Const("FALSE")).decode_string() == "FALSE" + if dsp->connections_.at(\INMODE, Const(0, 5)).is_fully_zero() + + select preAdd->type.in($add) + // Output has to be 25 bits or less + select GetSize(port(preAdd, \Y)) <= 25 + select nusers(port(preAdd, \Y)) == 2 + choice <IdString> AB {\A, \B} + // A port has to be 30 bits or less + select GetSize(port(preAdd, AB)) <= 30 + define <IdString> BA (AB == \A ? \B : \A) + // D port has to be 25 bits or less + select GetSize(port(preAdd, BA)) <= 25 + index <SigSpec> port(preAdd, \Y) === sigA + + optional +endmatch + +code sigA sigD + if (preAdd) { + sigA = port(preAdd, \A); + sigD = port(preAdd, \B); + if (GetSize(sigA) < GetSize(sigD)) + std::swap(sigA, sigD); + } +endcode + +code argQ ffA ffAmux ffAcepol sigA clock ffAD ffADmux ffADcepol + // Only search for ffA if there was a pre-adder + // (otherwise ffA would have been matched as ffAD) + if (preAdd) { + if (param(dsp, \AREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffA = dff; + clock = dffclock; + if (dffcemux) { + ffAmux = dffcemux; + ffAcepol = dffcepol; + } + sigA = dffD; + } + } + } + // And if there wasn't a pre-adder, + // move AD register to A + else if (ffAD) { + log_assert(!ffA && !ffAmux); + std::swap(ffA, ffAD); + std::swap(ffAmux, ffADmux); + ffAcepol = ffADcepol; + } +endcode + +code argQ ffB ffBmux ffBcepol sigB clock + if (param(dsp, \BREG).as_int() == 0) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + ffB = dff; + clock = dffclock; + if (dffcemux) { + ffBmux = dffcemux; + ffBcepol = dffcepol; + } + sigB = dffD; + } + } +endcode + +code argQ ffD ffDmux ffDcepol sigD clock + if (param(dsp, \DREG).as_int() == 0) { + argQ = sigD; + subpattern(in_dffe); + if (dff) { + ffD = dff; + clock = dffclock; + if (dffcemux) { + ffDmux = dffcemux; + ffDcepol = dffcepol; + } + sigD = dffD; + } + } +endcode + +code argD ffM ffMmux ffMcepol sigM sigP clock + if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { + argD = sigM; + subpattern(out_dffe); + if (dff) { + ffM = dff; + clock = dffclock; + if (dffcemux) { + ffMmux = dffcemux; + ffMcepol = dffcepol; + } + sigM = dffQ; + } + } + sigP = sigM; +endcode + +match postAdd + // Ensure that Z mux is not already used + if port(dsp, \OPMODE).extract(4,3).is_fully_zero() + + select postAdd->type.in($add) + select GetSize(port(postAdd, \Y)) <= 48 + select nusers(port(postAdd, \Y)) == 2 + choice <IdString> AB {\A, \B} + select nusers(port(postAdd, AB)) <= 3 + filter ffMmux || nusers(port(postAdd, AB)) == 2 + filter !ffMmux || nusers(port(postAdd, AB)) == 3 + filter GetSize(unextend(port(postAdd, AB))) <= GetSize(sigP) + filter unextend(port(postAdd, AB)) == sigP.extract(0, GetSize(unextend(port(postAdd, AB)))) + filter nusers(sigP.extract_end(GetSize(unextend(port(postAdd, AB))))) <= 1 + set postAddAB AB + optional +endmatch + +code sigC sigP + if (postAdd) { + sigC = port(postAdd, postAddAB == \A ? \B : \A); + + // TODO for DSP48E1, which will have sign extended inputs/outputs + //int natural_mul_width = GetSize(port(dsp, \A)) + GetSize(port(dsp, \B)); + //int actual_mul_width = GetSize(sigP); + //int actual_acc_width = GetSize(sigC); + + //if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) + // reject; + //if ((actual_acc_width != actual_mul_width) && (param(dsp, \A_SIGNED).as_bool() != param(postAdd, \A_SIGNED).as_bool())) + // reject; + + sigP = port(postAdd, \Y); + } +endcode + +code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock + if (param(dsp, \PREG).as_int() == 0) { + // If ffMmux and no postAdd new-value net must have exactly three users: ffMmux, ffM and ffPcemux + if ((ffMmux && !postAdd && nusers(sigP) == 3) || + // Otherwise new-value net must have exactly two users: dsp and ffPcemux + ((!ffMmux || postAdd) && nusers(sigP) == 2)) { + argD = sigP; + subpattern(out_dffe); + if (dff) { + ffP = dff; + clock = dffclock; + if (dffcemux) { + ffPcemux = dffcemux; + ffPcepol = dffcepol; + ffPrstmux = dffrstmux; + ffPrstpol = dffrstpol; + } + sigP = dffQ; + } + } + } +endcode + +match postAddMux + if postAdd + if ffP + select postAddMux->type.in($mux) + select nusers(port(postAddMux, \Y)) == 2 + choice <IdString> AB {\A, \B} + index <SigSpec> port(postAddMux, AB) === sigP + index <SigSpec> port(postAddMux, \Y) === sigC + set postAddMuxAB AB + optional +endmatch + +code sigC + if (postAddMux) + sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); +endcode + +code argQ ffC ffCmux ffCcepol sigC clock + if (param(dsp, \CREG).as_int() == 0) { + argQ = sigC; + subpattern(in_dffe); + if (dff) { + ffC = dff; + clock = dffclock; + if (dffcemux) { + ffCmux = dffcemux; + ffCcepol = dffcepol; + } + sigC = dffD; + } + } +endcode + +code + accept; +endcode + +// ####################### + +subpattern in_dffe +arg argQ clock ffcepol + +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + filter GetSize(port(ff, \Q)) >= GetSize(argQ) + slice offset GetSize(port(ff, \Q)) + filter offset+GetSize(argQ) <= GetSize(port(ff, \Q)) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + semioptional +endmatch + +code argQ + if (ff) { + for (auto c : argQ.chunks()) + if (c.wire->get_bool_attribute(\keep)) + reject; + + if (clock != SigBit()) { + if (port(ff, \CLK) != clock) + reject; + } + dffclock = port(ff, \CLK); + + dff = ff; + dffD = argQ; + dffD.replace(port(ff, \Q), port(ff, \D)); + // Only search for ffcemux if argQ has at + // least 3 users (ff, <upstream>, ffcemux) and + // its ff.D only has two (ff, ffcemux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argQ = SigSpec(); + } + else { + dff = nullptr; + argQ = SigSpec(); + } +endcode + +match ffcemux + if !argQ.empty() + select ffcemux->type.in($mux) + index <SigSpec> port(ffcemux, \Y) === port(ff, \D) + filter GetSize(port(ffcemux, \Y)) >= GetSize(dffD) + slice offset GetSize(port(ffcemux, \Y)) + filter offset+GetSize(dffD) <= GetSize(port(ffcemux, \Y)) + filter port(ffcemux, \Y).extract(offset, GetSize(dffD)) == dffD + choice <IdString> AB {\A, \B} + filter offset+GetSize(argQ) <= GetSize(port(ffcemux, \Y)) + filter port(ffcemux, AB).extract(offset, GetSize(argQ)) == argQ + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + dffD = port(ffcemux, dffcepol ? \B : \A); + } + else + dffcemux = nullptr; +endcode + +// ####################### + +subpattern out_dffe +arg argD argQ clock +arg unextend + +match ffcemux + select ffcemux->type.in($mux) + // ffcemux output must have two users: ffcemux and ff.D + select nusers(port(ffcemux, \Y)) == 2 + filter GetSize(port(ffcemux, \Y)) >= GetSize(argD) + + choice <IdString> BA {\B, \A} + // new-value net must have exactly two users: (upstream) and ffcemux + select nusers(port(ffcemux, BA)) == 2 + + define <IdString> AB (BA == \B ? \A : \B) + // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) + select nusers(port(ffcemux, AB)) >= 3 + + slice offset GetSize(port(ffcemux, \Y)) + filter GetSize(unextend(port(ffcemux, BA))) <= GetSize(argD) + filter unextend(port(ffcemux, BA)) == argD.extract(0, GetSize(unextend(port(ffcemux, BA)))) + // Remaining bits on argD must not have any other users + filter nusers(argD.extract_end(GetSize(unextend(port(ffcemux, BA))))) <= 1 + + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD argQ + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, \Y); + argQ = port(ffcemux, ffcepol ? \A : \B); + } + else + dffcemux = nullptr; +endcode + +match ffrstmux + if !argQ.empty() + select ffrstmux->type.in($mux) + // ffrstmux output must have two users: ffrstmux and ff.D + select nusers(port(ffrstmux, \Y)) == 2 + filter GetSize(port(ffrstmux, \Y)) >= GetSize(argD) + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <IdString> AB (BA == \B ? \A : \B) + // keep-last-value net must have exactly 2 users: ffrstmux, ffcemux/<upstream> + select nusers(port(ffrstmux, AB)) == 2 + + slice offset GetSize(port(ffrstmux, \Y)) + filter GetSize(port(ffrstmux, AB)) <= GetSize(argD) + filter port(ffrstmux, AB) == argD.extract(0, GetSize(port(ffrstmux, AB))) + // Remaining bits on argD must not have any other users + filter nusers(argD.extract_end(GetSize(port(ffrstmux, AB)))) <= 1 + + define <bool> pol (AB == \A) + set ffrstpol pol + semioptional +endmatch + +code argD argQ + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, \Y); + } + else { + dffrstmux = nullptr; + argQ = SigSpec(); + } +endcode + +match ff_enable + if !argQ.empty() + select ff_enable->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff_enable, \CLK_POLARITY).as_bool() + index <SigSpec> port(ff_enable, \D) === argD + index <SigSpec> port(ff_enable, \Q) === argQ +endmatch + +match ff + if !ff_enable + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + index <SigSpec> port(ff, \D) === argD + semioptional +endmatch + +code + if (ff_enable) + dff = ff_enable; + else + dff = ff; + if (dff) { + dffQ = port(dff, \Q); + + for (auto c : dffQ.chunks()) { + if (c.wire->get_bool_attribute(\keep)) + reject; + Const init = c.wire->attributes.at(\init, State::Sx); + if (!init.is_fully_undef() && !init.is_fully_zero()) + reject; + } + + if (clock != SigBit()) { + if (port(dff, \CLK) != clock) + reject; + } + dffclock = port(dff, \CLK); + } + // No enable/reset mux possible without flop + else if (ffcemux || ffrstmux) + reject; +endcode diff --git a/passes/tests/test_autotb.cc b/passes/tests/test_autotb.cc index bfb1d6642..2b6a86c25 100644 --- a/passes/tests/test_autotb.cc +++ b/passes/tests/test_autotb.cc @@ -345,9 +345,17 @@ struct TestAutotbBackend : public Backend { log("value after initialization. This can e.g. be used to force a reset signal\n"); log("low in order to explore more inner states in a state machine.\n"); log("\n"); + log("The attribute 'gentb_skip' can be attached to modules to suppress testbench\n"); + log("generation.\n"); + log("\n"); log(" -n <int>\n"); log(" number of iterations the test bench should run (default = 1000)\n"); log("\n"); + log(" -seed <int>\n"); + log(" seed used for pseudo-random number generation (default = 0).\n"); + log(" a value of 0 will cause an arbitrary seed to be chosen, based on\n"); + log(" the current system time.\n"); + log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { diff --git a/techlibs/common/Makefile.inc b/techlibs/common/Makefile.inc index de94798af..6c0a4fe66 100644 --- a/techlibs/common/Makefile.inc +++ b/techlibs/common/Makefile.inc @@ -28,4 +28,5 @@ $(eval $(call add_share_file,share,techlibs/common/dff2ff.v)) $(eval $(call add_share_file,share,techlibs/common/gate2lut.v)) $(eval $(call add_share_file,share,techlibs/common/cmp2lut.v)) $(eval $(call add_share_file,share,techlibs/common/cells.lib)) +$(eval $(call add_share_file,share,techlibs/common/mul2dsp.v)) $(eval $(call add_share_file,share,techlibs/common/dummy.box)) diff --git a/techlibs/common/mul2dsp.v b/techlibs/common/mul2dsp.v new file mode 100644 index 000000000..f2b44222e --- /dev/null +++ b/techlibs/common/mul2dsp.v @@ -0,0 +1,326 @@ +/*
+ * yosys -- Yosys Open SYnthesis Suite
+ *
+ * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at>
+ * 2019 Eddie Hung <eddie@fpgeh.com>
+ * 2019 David Shah <dave@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * ---
+ *
+ * Tech-mapping rules for decomposing arbitrarily-sized $mul cells
+ * into an equivalent collection of smaller `DSP_NAME cells (with the
+ * same interface as $mul) no larger than `DSP_[AB]_MAXWIDTH, attached
+ * to $shl and $add cells.
+ *
+ */
+
+`ifndef DSP_A_MAXWIDTH
+$fatal(1, "Macro DSP_A_MAXWIDTH must be defined");
+`endif
+`ifndef DSP_B_MAXWIDTH
+$fatal(1, "Macro DSP_B_MAXWIDTH must be defined");
+`endif
+`ifndef DSP_B_MAXWIDTH
+$fatal(1, "Macro DSP_B_MAXWIDTH must be defined");
+`endif
+`ifndef DSP_A_MAXWIDTH_PARTIAL
+`define DSP_A_MAXWIDTH_PARTIAL `DSP_A_MAXWIDTH
+`endif
+`ifndef DSP_B_MAXWIDTH_PARTIAL
+`define DSP_B_MAXWIDTH_PARTIAL `DSP_B_MAXWIDTH
+`endif
+
+`ifndef DSP_NAME
+$fatal(1, "Macro DSP_NAME must be defined");
+`endif
+
+`define MAX(a,b) (a > b ? a : b)
+`define MIN(a,b) (a < b ? a : b)
+
+(* techmap_celltype = "$mul $__mul" *)
+module _80_mul (A, B, Y);
+ parameter A_SIGNED = 0;
+ parameter B_SIGNED = 0;
+ parameter A_WIDTH = 1;
+ parameter B_WIDTH = 1;
+ parameter Y_WIDTH = 1;
+
+ input [A_WIDTH-1:0] A;
+ input [B_WIDTH-1:0] B;
+ output [Y_WIDTH-1:0] Y;
+
+ parameter _TECHMAP_CELLTYPE_ = "";
+
+ generate
+ if (0) begin end
+`ifdef DSP_A_MINWIDTH
+ else if (A_WIDTH < `DSP_A_MINWIDTH)
+ wire _TECHMAP_FAIL_ = 1;
+`endif
+`ifdef DSP_B_MINWIDTH
+ else if (B_WIDTH < `DSP_B_MINWIDTH)
+ wire _TECHMAP_FAIL_ = 1;
+`endif
+`ifdef DSP_Y_MINWIDTH
+ else if (Y_WIDTH < `DSP_Y_MINWIDTH)
+ wire _TECHMAP_FAIL_ = 1;
+`endif
+ else if (_TECHMAP_CELLTYPE_ == "$mul" && A_SIGNED != B_SIGNED)
+ wire _TECHMAP_FAIL_ = 1;
+`ifdef DSP_SIGNEDONLY
+ else if (_TECHMAP_CELLTYPE_ == "$mul" && !A_SIGNED)
+ \$mul #(
+ .A_SIGNED(1),
+ .B_SIGNED(1),
+ .A_WIDTH(A_WIDTH + 1),
+ .B_WIDTH(B_WIDTH + 1),
+ .Y_WIDTH(Y_WIDTH)
+ ) _TECHMAP_REPLACE_ (
+ .A({1'b0, A}),
+ .B({1'b0, B}),
+ .Y(Y)
+ );
+`endif
+ else if (_TECHMAP_CELLTYPE_ == "$mul" && A_WIDTH < B_WIDTH)
+ \$mul #(
+ .A_SIGNED(B_SIGNED),
+ .B_SIGNED(A_SIGNED),
+ .A_WIDTH(B_WIDTH),
+ .B_WIDTH(A_WIDTH),
+ .Y_WIDTH(Y_WIDTH)
+ ) _TECHMAP_REPLACE_ (
+ .A(B),
+ .B(A),
+ .Y(Y)
+ );
+ else begin
+ wire [1023:0] _TECHMAP_DO_ = "proc; clean";
+
+`ifdef DSP_SIGNEDONLY
+ localparam sign_headroom = 1;
+`else
+ localparam sign_headroom = 0;
+`endif
+
+ genvar i;
+ if (A_WIDTH > `DSP_A_MAXWIDTH) begin
+ localparam n = (A_WIDTH-`DSP_A_MAXWIDTH+`DSP_A_MAXWIDTH_PARTIAL-sign_headroom-1) / (`DSP_A_MAXWIDTH_PARTIAL-sign_headroom);
+ localparam partial_Y_WIDTH = `MIN(Y_WIDTH, B_WIDTH+`DSP_A_MAXWIDTH_PARTIAL);
+ localparam last_A_WIDTH = A_WIDTH-n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom);
+ localparam last_Y_WIDTH = B_WIDTH+last_A_WIDTH;
+ if (A_SIGNED && B_SIGNED) begin
+ wire signed [partial_Y_WIDTH-1:0] partial [n-1:0];
+ wire signed [last_Y_WIDTH-1:0] last_partial;
+ wire signed [Y_WIDTH-1:0] partial_sum [n:0];
+ end
+ else begin
+ wire [partial_Y_WIDTH-1:0] partial [n-1:0];
+ wire [last_Y_WIDTH-1:0] last_partial;
+ wire [Y_WIDTH-1:0] partial_sum [n:0];
+ end
+
+ for (i = 0; i < n; i=i+1) begin:slice
+ \$__mul #(
+ .A_SIGNED(sign_headroom),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(`DSP_A_MAXWIDTH_PARTIAL),
+ .B_WIDTH(B_WIDTH),
+ .Y_WIDTH(partial_Y_WIDTH)
+ ) mul_slice (
+ .A({{sign_headroom{1'b0}}, A[i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom) +: `DSP_A_MAXWIDTH_PARTIAL-sign_headroom]}),
+ .B(B),
+ .Y(partial[i])
+ );
+ // TODO: Currently a 'cascade' approach to summing the partial
+ // products is taken here, but a more efficient 'binary
+ // reduction' approach also exists...
+ if (i == 0)
+ assign partial_sum[i] = partial[i];
+ else begin
+ // Rewrite the following statement explicitly in order
+ // to save on a call to 'opt_expr -fine' which would
+ // optimise away the '<<' op and trim size of adder
+ //assign partial_sum[i] = (partial[i] << i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)) + partial_sum[i-1];
+ if (A_SIGNED && B_SIGNED)
+ assign partial_sum[i][Y_WIDTH-1:i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)] = partial[i] + $signed(partial_sum[i-1][Y_WIDTH-1:i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)]);
+ else
+ assign partial_sum[i][Y_WIDTH-1:i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)] = partial[i] + partial_sum[i-1][Y_WIDTH-1:i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)];
+ assign partial_sum[i][i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)-1:0] = partial_sum[i-1][i*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)-1:0];
+ end
+ end
+
+ \$__mul #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(last_A_WIDTH),
+ .B_WIDTH(B_WIDTH),
+ .Y_WIDTH(last_Y_WIDTH)
+ ) mul_slice_last (
+ .A(A[A_WIDTH-1 -: last_A_WIDTH]),
+ .B(B),
+ .Y(last_partial)
+ );
+ //assign partial_sum[n] = (last_partial << n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)) + partial_sum[n-1];
+ if (A_SIGNED && B_SIGNED)
+ assign partial_sum[n][Y_WIDTH-1:n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)] = last_partial + $signed(partial_sum[n-1][Y_WIDTH-1:n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)]);
+ else
+ assign partial_sum[n][Y_WIDTH-1:n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)] = last_partial + partial_sum[n-1][Y_WIDTH-1:n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)];
+ assign partial_sum[n][n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)-1:0] = partial_sum[n-1][n*(`DSP_A_MAXWIDTH_PARTIAL-sign_headroom)-1:0];
+ assign Y = partial_sum[n];
+ end
+ else if (B_WIDTH > `DSP_B_MAXWIDTH) begin
+ localparam n = (B_WIDTH-`DSP_B_MAXWIDTH+`DSP_B_MAXWIDTH_PARTIAL-sign_headroom-1) / (`DSP_B_MAXWIDTH_PARTIAL-sign_headroom);
+ localparam partial_Y_WIDTH = `MIN(Y_WIDTH, A_WIDTH+`DSP_B_MAXWIDTH_PARTIAL);
+ localparam last_B_WIDTH = B_WIDTH-n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom);
+ localparam last_Y_WIDTH = A_WIDTH+last_B_WIDTH;
+ if (A_SIGNED && B_SIGNED) begin
+ wire signed [partial_Y_WIDTH-1:0] partial [n-1:0];
+ wire signed [last_Y_WIDTH-1:0] last_partial;
+ wire signed [Y_WIDTH-1:0] partial_sum [n:0];
+ end
+ else begin
+ wire [partial_Y_WIDTH-1:0] partial [n-1:0];
+ wire [last_Y_WIDTH-1:0] last_partial;
+ wire [Y_WIDTH-1:0] partial_sum [n:0];
+ end
+
+ for (i = 0; i < n; i=i+1) begin:slice
+ \$__mul #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(sign_headroom),
+ .A_WIDTH(A_WIDTH),
+ .B_WIDTH(`DSP_B_MAXWIDTH_PARTIAL),
+ .Y_WIDTH(partial_Y_WIDTH)
+ ) mul (
+ .A(A),
+ .B({{sign_headroom{1'b0}}, B[i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom) +: `DSP_B_MAXWIDTH_PARTIAL-sign_headroom]}),
+ .Y(partial[i])
+ );
+ // TODO: Currently a 'cascade' approach to summing the partial
+ // products is taken here, but a more efficient 'binary
+ // reduction' approach also exists...
+ if (i == 0)
+ assign partial_sum[i] = partial[i];
+ else begin
+ // Rewrite the following statement explicitly in order
+ // to save on a call to 'opt_expr -fine' which would
+ // optimise away the '<<' op and trim size of adder
+ //assign partial_sum[i] = (partial[i] << i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)) + partial_sum[i-1];
+ if (A_SIGNED && B_SIGNED)
+ assign partial_sum[i][Y_WIDTH-1:i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)] = partial[i] + $signed(partial_sum[i-1][Y_WIDTH-1:i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)]);
+ else
+ assign partial_sum[i][Y_WIDTH-1:i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)] = partial[i] + partial_sum[i-1][Y_WIDTH-1:i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)];
+ assign partial_sum[i][i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)-1:0] = partial_sum[i-1][i*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)-1:0];
+ end
+ end
+
+ \$__mul #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(A_WIDTH),
+ .B_WIDTH(last_B_WIDTH),
+ .Y_WIDTH(last_Y_WIDTH)
+ ) mul_last (
+ .A(A),
+ .B(B[B_WIDTH-1 -: last_B_WIDTH]),
+ .Y(last_partial)
+ );
+ //assign partial_sum[n] = (last_partial << n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)) + partial_sum[n-1];
+ if (A_SIGNED && B_SIGNED)
+ assign partial_sum[n][Y_WIDTH-1:n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)] = last_partial + partial_sum[n-1][Y_WIDTH-1:n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)];
+ else
+ assign partial_sum[n][Y_WIDTH-1:n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)] = last_partial + $signed(partial_sum[n-1][Y_WIDTH-1:n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)]);
+ assign partial_sum[n][n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)-1:0] = partial_sum[n-1][n*(`DSP_B_MAXWIDTH_PARTIAL-sign_headroom)-1:0];
+ assign Y = partial_sum[n];
+ end
+ else begin
+ if (A_SIGNED)
+ wire signed [`DSP_A_MAXWIDTH-1:0] Aext = $signed(A);
+ else
+ wire [`DSP_A_MAXWIDTH-1:0] Aext = A;
+ if (B_SIGNED)
+ wire signed [`DSP_B_MAXWIDTH-1:0] Bext = $signed(B);
+ else
+ wire [`DSP_B_MAXWIDTH-1:0] Bext = B;
+
+ `DSP_NAME #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(`DSP_A_MAXWIDTH),
+ .B_WIDTH(`DSP_B_MAXWIDTH),
+ .Y_WIDTH(`MIN(Y_WIDTH,`DSP_A_MAXWIDTH+`DSP_B_MAXWIDTH)),
+ ) _TECHMAP_REPLACE_ (
+ .A(Aext),
+ .B(Bext),
+ .Y(Y)
+ );
+ end
+ end
+ endgenerate
+endmodule
+
+(* techmap_celltype = "$mul $__mul" *)
+module _90_soft_mul (A, B, Y);
+ parameter A_SIGNED = 0;
+ parameter B_SIGNED = 0;
+ parameter A_WIDTH = 1;
+ parameter B_WIDTH = 1;
+ parameter Y_WIDTH = 1;
+
+ input [A_WIDTH-1:0] A;
+ input [B_WIDTH-1:0] B;
+ output [Y_WIDTH-1:0] Y;
+
+ // Indirection necessary since mapping
+ // back to $mul will cause recursion
+ generate
+ if (A_SIGNED && !B_SIGNED)
+ \$__soft_mul #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(1),
+ .A_WIDTH(A_WIDTH),
+ .B_WIDTH(B_WIDTH+1),
+ .Y_WIDTH(Y_WIDTH)
+ ) _TECHMAP_REPLACE_ (
+ .A(A),
+ .B({1'b0,B}),
+ .Y(Y)
+ );
+ else if (!A_SIGNED && B_SIGNED)
+ \$__soft_mul #(
+ .A_SIGNED(1),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(A_WIDTH+1),
+ .B_WIDTH(B_WIDTH),
+ .Y_WIDTH(Y_WIDTH)
+ ) _TECHMAP_REPLACE_ (
+ .A({1'b0,A}),
+ .B(B),
+ .Y(Y)
+ );
+ else
+ \$__soft_mul #(
+ .A_SIGNED(A_SIGNED),
+ .B_SIGNED(B_SIGNED),
+ .A_WIDTH(A_WIDTH),
+ .B_WIDTH(B_WIDTH),
+ .Y_WIDTH(Y_WIDTH)
+ ) _TECHMAP_REPLACE_ (
+ .A(A),
+ .B(B),
+ .Y(Y)
+ );
+ endgenerate
+endmodule
diff --git a/techlibs/ecp5/Makefile.inc b/techlibs/ecp5/Makefile.inc index 9efb6347f..80eee5004 100644 --- a/techlibs/ecp5/Makefile.inc +++ b/techlibs/ecp5/Makefile.inc @@ -13,6 +13,7 @@ $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/brams_map.v)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/bram.txt)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/arith_map.v)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/latches_map.v)) +$(eval $(call add_share_file,share/ecp5,techlibs/ecp5/dsp_map.v)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_map.v)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_unmap.v)) diff --git a/techlibs/ecp5/dsp_map.v b/techlibs/ecp5/dsp_map.v new file mode 100644 index 000000000..cb95ddb1c --- /dev/null +++ b/techlibs/ecp5/dsp_map.v @@ -0,0 +1,17 @@ +module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] Y); + + parameter A_WIDTH = 18; + parameter B_WIDTH = 18; + parameter Y_WIDTH = 36; + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + + MULT18X18D _TECHMAP_REPLACE_ ( + .A0(A[0]), .A1(A[1]), .A2(A[2]), .A3(A[3]), .A4(A[4]), .A5(A[5]), .A6(A[6]), .A7(A[7]), .A8(A[8]), .A9(A[9]), .A10(A[10]), .A11(A[11]), .A12(A[12]), .A13(A[13]), .A14(A[14]), .A15(A[15]), .A16(A[16]), .A17(A[17]), + .B0(B[0]), .B1(B[1]), .B2(B[2]), .B3(B[3]), .B4(B[4]), .B5(B[5]), .B6(B[6]), .B7(B[7]), .B8(B[8]), .B9(B[9]), .B10(B[10]), .B11(B[11]), .B12(B[12]), .B13(B[13]), .B14(B[14]), .B15(B[15]), .B16(B[16]), .B17(B[17]), + .C17(1'b0), .C16(1'b0), .C15(1'b0), .C14(1'b0), .C13(1'b0), .C12(1'b0), .C11(1'b0), .C10(1'b0), .C9(1'b0), .C8(1'b0), .C7(1'b0), .C6(1'b0), .C5(1'b0), .C4(1'b0), .C3(1'b0), .C2(1'b0), .C1(1'b0), .C0(1'b0), + .SIGNEDA(A_SIGNED), .SIGNEDB(B_SIGNED), .SOURCEA(1'b0), .SOURCEB(1'b0), + + .P0(Y[0]), .P1(Y[1]), .P2(Y[2]), .P3(Y[3]), .P4(Y[4]), .P5(Y[5]), .P6(Y[6]), .P7(Y[7]), .P8(Y[8]), .P9(Y[9]), .P10(Y[10]), .P11(Y[11]), .P12(Y[12]), .P13(Y[13]), .P14(Y[14]), .P15(Y[15]), .P16(Y[16]), .P17(Y[17]), .P18(Y[18]), .P19(Y[19]), .P20(Y[20]), .P21(Y[21]), .P22(Y[22]), .P23(Y[23]), .P24(Y[24]), .P25(Y[25]), .P26(Y[26]), .P27(Y[27]), .P28(Y[28]), .P29(Y[29]), .P30(Y[30]), .P31(Y[31]), .P32(Y[32]), .P33(Y[33]), .P34(Y[34]), .P35(Y[35]) + ); +endmodule diff --git a/techlibs/ecp5/synth_ecp5.cc b/techlibs/ecp5/synth_ecp5.cc index 2593546e0..0a3dcc62c 100644 --- a/techlibs/ecp5/synth_ecp5.cc +++ b/techlibs/ecp5/synth_ecp5.cc @@ -89,6 +89,9 @@ struct SynthEcp5Pass : public ScriptPass log(" generate an output netlist (and BLIF file) suitable for VPR\n"); log(" (this feature is experimental and incomplete)\n"); log("\n"); + log(" -nodsp\n"); + log(" do not map multipliers to MULT18X18D\n"); + log("\n"); log("\n"); log("The following commands are executed by this synthesis command:\n"); help_script(); @@ -96,7 +99,7 @@ struct SynthEcp5Pass : public ScriptPass } string top_opt, blif_file, edif_file, json_file; - bool noccu2, nodffe, nobram, nolutram, nowidelut, flatten, retime, abc2, abc9, vpr; + bool noccu2, nodffe, nobram, nolutram, nowidelut, flatten, retime, abc2, abc9, nodsp, vpr; void clear_flags() YS_OVERRIDE { @@ -114,6 +117,7 @@ struct SynthEcp5Pass : public ScriptPass abc2 = false; vpr = false; abc9 = false; + nodsp = false; } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE @@ -192,6 +196,10 @@ struct SynthEcp5Pass : public ScriptPass abc9 = true; continue; } + if (args[argidx] == "-nodsp") { + nodsp = true; + continue; + } break; } extra_args(args, argidx, design); @@ -228,7 +236,29 @@ struct SynthEcp5Pass : public ScriptPass if (check_label("coarse")) { - run("synth -run coarse"); + run("opt_expr"); + run("opt_clean"); + run("check"); + run("opt"); + run("wreduce"); + run("peepopt"); + run("opt_clean"); + run("share"); + run("techmap -map +/cmp2lut.v -D LUT_WIDTH=4"); + run("opt_expr"); + run("opt_clean"); + if (!nodsp) { + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=18 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=$__MUL18X18", "(unless -nodsp)"); + run("clean", "(unless -nodsp)"); + run("techmap -map +/ecp5/dsp_map.v", "(unless -nodsp)"); + run("chtype -set $mul t:$__soft_mul", "(unless -nodsp)"); + } + run("alumacc"); + run("opt"); + run("fsm"); + run("opt -fast"); + run("memory -nomap"); + run("opt_clean"); } if (!nobram && check_label("map_bram", "(skip if -nobram)")) diff --git a/techlibs/ice40/Makefile.inc b/techlibs/ice40/Makefile.inc index 76a89b107..92a9956ea 100644 --- a/techlibs/ice40/Makefile.inc +++ b/techlibs/ice40/Makefile.inc @@ -27,6 +27,7 @@ $(eval $(call add_share_file,share/ice40,techlibs/ice40/cells_sim.v)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/latches_map.v)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/brams.txt)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/brams_map.v)) +$(eval $(call add_share_file,share/ice40,techlibs/ice40/dsp_map.v)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/abc_hx.box)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/abc_hx.lut)) $(eval $(call add_share_file,share/ice40,techlibs/ice40/abc_lp.box)) diff --git a/techlibs/ice40/dsp_map.v b/techlibs/ice40/dsp_map.v new file mode 100644 index 000000000..06fa73956 --- /dev/null +++ b/techlibs/ice40/dsp_map.v @@ -0,0 +1,34 @@ +module \$__MUL16X16 (input [15:0] A, input [15:0] B, output [31:0] Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + SB_MAC16 #( + .NEG_TRIGGER(1'b0), + .C_REG(1'b0), + .A_REG(1'b0), + .B_REG(1'b0), + .D_REG(1'b0), + .TOP_8x8_MULT_REG(1'b0), + .BOT_8x8_MULT_REG(1'b0), + .PIPELINE_16x16_MULT_REG1(1'b0), + .PIPELINE_16x16_MULT_REG2(1'b0), + .TOPOUTPUT_SELECT(2'b11), + .TOPADDSUB_LOWERINPUT(2'b0), + .TOPADDSUB_UPPERINPUT(1'b0), + .TOPADDSUB_CARRYSELECT(2'b0), + .BOTOUTPUT_SELECT(2'b11), + .BOTADDSUB_LOWERINPUT(2'b0), + .BOTADDSUB_UPPERINPUT(1'b0), + .BOTADDSUB_CARRYSELECT(2'b0), + .MODE_8x8(1'b0), + .A_SIGNED(A_SIGNED), + .B_SIGNED(B_SIGNED) + ) _TECHMAP_REPLACE_ ( + .A(A), + .B(B), + .O(Y), + ); +endmodule diff --git a/techlibs/ice40/synth_ice40.cc b/techlibs/ice40/synth_ice40.cc index a3890268a..55aa72aa7 100644 --- a/techlibs/ice40/synth_ice40.cc +++ b/techlibs/ice40/synth_ice40.cc @@ -272,8 +272,13 @@ struct SynthIce40Pass : public ScriptPass run("techmap -map +/cmp2lut.v -D LUT_WIDTH=4"); run("opt_expr"); run("opt_clean"); - if (help_mode || dsp) - run("ice40_dsp", "(if -dsp)"); + if (help_mode || dsp) { + run("techmap -map +/mul2dsp.v -map +/ice40/dsp_map.v -D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=16 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_Y_MINWIDTH=11 -D DSP_NAME=$__MUL16X16", "(if -dsp)"); + run("opt_expr -fine", " (if -dsp)"); + run("wreduce", " (if -dsp)"); + run("ice40_dsp", " (if -dsp)"); + run("chtype -set $mul t:$__soft_mul","(if -dsp)"); + } run("alumacc"); run("opt"); run("fsm"); diff --git a/techlibs/xilinx/Makefile.inc b/techlibs/xilinx/Makefile.inc index b5e81a79d..5f5aa5518 100644 --- a/techlibs/xilinx/Makefile.inc +++ b/techlibs/xilinx/Makefile.inc @@ -39,6 +39,7 @@ $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/xc6s_ff_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/xc7_ff_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/lut_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/mux_map.v)) +$(eval $(call add_share_file,share/xilinx,techlibs/xilinx/dsp_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/abc_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/abc_unmap.v)) diff --git a/techlibs/xilinx/cells_sim.v b/techlibs/xilinx/cells_sim.v index 6e8729256..ed421f85e 100644 --- a/techlibs/xilinx/cells_sim.v +++ b/techlibs/xilinx/cells_sim.v @@ -489,3 +489,412 @@ module SRLC32E ( always @(posedge CLK) if (CE) r <= { r[30:0], D }; endgenerate endmodule + +module DSP48E1 ( + output [29:0] ACOUT, + output [17:0] BCOUT, + output reg CARRYCASCOUT, + output reg [3:0] CARRYOUT, + output reg MULTSIGNOUT, + output OVERFLOW, + output reg signed [47:0] P, + output PATTERNBDETECT, + output PATTERNDETECT, + output [47:0] PCOUT, + output UNDERFLOW, + input signed [29:0] A, + input [29:0] ACIN, + input [3:0] ALUMODE, + input signed [17:0] B, + input [17:0] BCIN, + input [47:0] C, + input CARRYCASCIN, + input CARRYIN, + input [2:0] CARRYINSEL, + input CEA1, + input CEA2, + input CEAD, + input CEALUMODE, + input CEB1, + input CEB2, + input CEC, + input CECARRYIN, + input CECTRL, + input CED, + input CEINMODE, + input CEM, + input CEP, + (* clkbuf_sink *) input CLK, + input [24:0] D, + input [4:0] INMODE, + input MULTSIGNIN, + input [6:0] OPMODE, + input [47:0] PCIN, + input RSTA, + input RSTALLCARRYIN, + input RSTALUMODE, + input RSTB, + input RSTC, + input RSTCTRL, + input RSTD, + input RSTINMODE, + input RSTM, + input RSTP +); + parameter integer ACASCREG = 1; + parameter integer ADREG = 1; + parameter integer ALUMODEREG = 1; + parameter integer AREG = 1; + parameter AUTORESET_PATDET = "NO_RESET"; + parameter A_INPUT = "DIRECT"; + parameter integer BCASCREG = 1; + parameter integer BREG = 1; + parameter B_INPUT = "DIRECT"; + parameter integer CARRYINREG = 1; + parameter integer CARRYINSELREG = 1; + parameter integer CREG = 1; + parameter integer DREG = 1; + parameter integer INMODEREG = 1; + parameter integer MREG = 1; + parameter integer OPMODEREG = 1; + parameter integer PREG = 1; + parameter SEL_MASK = "MASK"; + parameter SEL_PATTERN = "PATTERN"; + parameter USE_DPORT = "FALSE"; + parameter USE_MULT = "MULTIPLY"; + parameter USE_PATTERN_DETECT = "NO_PATDET"; + parameter USE_SIMD = "ONE48"; + parameter [47:0] MASK = 48'h3FFFFFFFFFFF; + parameter [47:0] PATTERN = 48'h000000000000; + parameter [3:0] IS_ALUMODE_INVERTED = 4'b0; + parameter [0:0] IS_CARRYIN_INVERTED = 1'b0; + parameter [0:0] IS_CLK_INVERTED = 1'b0; + parameter [4:0] IS_INMODE_INVERTED = 5'b0; + parameter [6:0] IS_OPMODE_INVERTED = 7'b0; + + initial begin +`ifdef __ICARUS__ + if (AUTORESET_PATDET != "NO_RESET") $fatal(1, "Unsupported AUTORESET_PATDET value"); + //if (PREG != 0) $fatal(1, "Unsupported PREG value"); + if (SEL_MASK != "MASK") $fatal(1, "Unsupported SEL_MASK value"); + if (SEL_PATTERN != "PATTERN") $fatal(1, "Unsupported SEL_PATTERN value"); + if (USE_PATTERN_DETECT != "NO_PATDET") $fatal(1, "Unsupported USE_PATTERN_DETECT value"); + if (USE_SIMD != "ONE48" && USE_SIMD != "TWO24" && USE_SIMD != "FOUR12") $fatal(1, "Unsupported USE_SIMD value"); + if (IS_ALUMODE_INVERTED != 4'b0) $fatal(1, "Unsupported IS_ALUMODE_INVERTED value"); + if (IS_CARRYIN_INVERTED != 1'b0) $fatal(1, "Unsupported IS_CARRYIN_INVERTED value"); + if (IS_CLK_INVERTED != 1'b0) $fatal(1, "Unsupported IS_CLK_INVERTED value"); + if (IS_INMODE_INVERTED != 5'b0) $fatal(1, "Unsupported IS_INMODE_INVERTED value"); + if (IS_OPMODE_INVERTED != 7'b0) $fatal(1, "Unsupported IS_OPMODE_INVERTED value"); +`endif + end + + wire signed [29:0] A_muxed; + wire signed [17:0] B_muxed; + + generate + if (A_INPUT == "CASCADE") assign A_muxed = ACIN; + else assign A_muxed = A; + + if (B_INPUT == "CASCADE") assign B_muxed = BCIN; + else assign B_muxed = B; + endgenerate + + reg signed [29:0] Ar1 = 30'b0, Ar2 = 30'b0; + reg signed [24:0] Dr = 25'b0; + reg signed [17:0] Br1 = 18'b0, Br2 = 18'b0; + reg signed [47:0] Cr = 48'b0; + reg [4:0] INMODEr = 5'b0; + reg [6:0] OPMODEr = 7'b0; + reg [3:0] ALUMODEr = 4'b0; + reg [2:0] CARRYINSELr = 3'b0; + + generate + // Configurable A register + if (AREG == 2) begin + always @(posedge CLK) + if (RSTA) begin + Ar1 <= 30'b0; + Ar2 <= 30'b0; + end else begin + if (CEA1) Ar1 <= A_muxed; + if (CEA2) Ar2 <= Ar1; + end + end else if (AREG == 1) begin + always @(posedge CLK) + if (RSTA) begin + Ar1 <= 30'b0; + Ar2 <= 30'b0; + end else begin + if (CEA1) Ar1 <= A_muxed; + if (CEA2) Ar2 <= A_muxed; + end + end else begin + always @* Ar1 <= A_muxed; + always @* Ar2 <= A_muxed; + end + + // Configurable B register + if (BREG == 2) begin + always @(posedge CLK) + if (RSTB) begin + Br1 <= 18'b0; + Br2 <= 18'b0; + end else begin + if (CEB1) Br1 <= B_muxed; + if (CEB2) Br2 <= Br1; + end + end else if (BREG == 1) begin + always @(posedge CLK) + if (RSTB) begin + Br1 <= 18'b0; + Br2 <= 18'b0; + end else begin + if (CEB1) Br1 <= B_muxed; + if (CEB2) Br2 <= B_muxed; + end + end else begin + always @* Br1 <= B_muxed; + always @* Br2 <= B_muxed; + end + + // C and D registers + if (CREG == 1) begin always @(posedge CLK) if (RSTC) Cr <= 48'b0; else if (CEC) Cr <= C; end + else always @* Cr <= C; + + if (DREG == 1) begin always @(posedge CLK) if (RSTD) Dr <= 25'b0; else if (CED) Dr <= D; end + else always @* Dr <= D; + + // Control registers + if (INMODEREG == 1) begin always @(posedge CLK) if (RSTINMODE) INMODEr <= 5'b0; else if (CEINMODE) INMODEr <= INMODE; end + else always @* INMODEr <= INMODE; + if (OPMODEREG == 1) begin always @(posedge CLK) if (RSTCTRL) OPMODEr <= 7'b0; else if (CECTRL) OPMODEr <= OPMODE; end + else always @* OPMODEr <= OPMODE; + if (ALUMODEREG == 1) begin always @(posedge CLK) if (RSTALUMODE) ALUMODEr <= 4'b0; else if (CEALUMODE) ALUMODEr <= ALUMODE; end + else always @* ALUMODEr <= ALUMODE; + if (CARRYINSELREG == 1) begin always @(posedge CLK) if (RSTCTRL) CARRYINSELr <= 3'b0; else if (CECTRL) CARRYINSELr <= CARRYINSEL; end + else always @* CARRYINSELr <= CARRYINSEL; + endgenerate + + // A and B cascsde + generate + if (ACASCREG == 1 && AREG == 2) assign ACOUT = Ar1; + else assign ACOUT = Ar2; + if (BCASCREG == 1 && BREG == 2) assign BCOUT = Br1; + else assign BCOUT = Br2; + endgenerate + + // A/D input selection and pre-adder + wire signed [29:0] Ar12_muxed = INMODEr[0] ? Ar1 : Ar2; + wire signed [24:0] Ar12_gated = INMODEr[1] ? 25'b0 : Ar12_muxed; + wire signed [24:0] Dr_gated = INMODEr[2] ? Dr : 25'b0; + wire signed [24:0] AD_result = INMODEr[3] ? (Dr_gated - Ar12_gated) : (Dr_gated + Ar12_gated); + reg signed [24:0] ADr = 25'b0; + + generate + if (ADREG == 1) begin always @(posedge CLK) if (RSTD) ADr <= 25'b0; else if (CEAD) ADr <= AD_result; end + else always @* ADr <= AD_result; + endgenerate + + // 25x18 multiplier + wire signed [24:0] A_MULT; + wire signed [17:0] B_MULT = INMODEr[4] ? Br1 : Br2; + generate + if (USE_DPORT == "TRUE") assign A_MULT = ADr; + else assign A_MULT = Ar12_gated; + endgenerate + + wire signed [42:0] M = A_MULT * B_MULT; + wire signed [42:0] Mx = (CARRYINSEL == 3'b010) ? 43'bx : M; + reg signed [42:0] Mr = 43'b0; + + // Multiplier result register + generate + if (MREG == 1) begin always @(posedge CLK) if (RSTM) Mr <= 43'b0; else if (CEM) Mr <= Mx; end + else always @* Mr <= Mx; + endgenerate + + wire signed [42:0] Mrx = (CARRYINSELr == 3'b010) ? 43'bx : Mr; + + // X, Y and Z ALU inputs + reg signed [47:0] X, Y, Z; + + always @* begin + // X multiplexer + case (OPMODEr[1:0]) + 2'b00: X = 48'b0; + 2'b01: begin X = $signed(Mrx); +`ifdef __ICARUS__ + if (OPMODEr[3:2] != 2'b01) $fatal(1, "OPMODEr[3:2] must be 2'b01 when OPMODEr[1:0] is 2'b01"); +`endif + end + 2'b10: begin X = P; +`ifdef __ICARUS__ + if (PREG != 1) $fatal(1, "PREG must be 1 when OPMODEr[1:0] is 2'b10"); +`endif + end + 2'b11: X = $signed({Ar2, Br2}); + default: X = 48'bx; + endcase + + // Y multiplexer + case (OPMODEr[3:2]) + 2'b00: Y = 48'b0; + 2'b01: begin Y = 48'b0; // FIXME: more accurate partial product modelling? +`ifdef __ICARUS__ + if (OPMODEr[1:0] != 2'b01) $fatal(1, "OPMODEr[1:0] must be 2'b01 when OPMODEr[3:2] is 2'b01"); +`endif + end + 2'b10: Y = {48{1'b1}}; + 2'b11: Y = Cr; + default: Y = 48'bx; + endcase + + // Z multiplexer + case (OPMODEr[6:4]) + 3'b000: Z = 48'b0; + 3'b001: Z = PCIN; + 3'b010: begin Z = P; +`ifdef __ICARUS__ + if (PREG != 1) $fatal(1, "PREG must be 1 when OPMODEr[6:4] i0s 3'b010"); +`endif + end + 3'b011: Z = Cr; + 3'b100: begin Z = P; +`ifdef __ICARUS__ + if (PREG != 1) $fatal(1, "PREG must be 1 when OPMODEr[6:4] is 3'b100"); + if (OPMODEr[3:0] != 4'b1000) $fatal(1, "OPMODEr[3:0] must be 4'b1000 when OPMODEr[6:4] i0s 3'b100"); +`endif + end + 3'b101: Z = $signed(PCIN[47:17]); + 3'b110: Z = $signed(P[47:17]); + default: Z = 48'bx; + endcase + end + + // Carry in + wire A24_xnor_B17d = A_MULT[24] ~^ B_MULT[17]; + reg CARRYINr = 1'b0, A24_xnor_B17 = 1'b0; + generate + if (CARRYINREG == 1) begin always @(posedge CLK) if (RSTALLCARRYIN) CARRYINr <= 1'b0; else if (CECARRYIN) CARRYINr <= CARRYIN; end + else always @* CARRYINr = CARRYIN; + + if (MREG == 1) begin always @(posedge CLK) if (RSTALLCARRYIN) A24_xnor_B17 <= 1'b0; else if (CEM) A24_xnor_B17 <= A24_xnor_B17d; end + else always @* A24_xnor_B17 = A24_xnor_B17d; + endgenerate + + reg cin_muxed; + + always @(*) begin + case (CARRYINSELr) + 3'b000: cin_muxed = CARRYINr; + 3'b001: cin_muxed = ~PCIN[47]; + 3'b010: cin_muxed = CARRYCASCIN; + 3'b011: cin_muxed = PCIN[47]; + 3'b100: cin_muxed = CARRYCASCOUT; + 3'b101: cin_muxed = ~P[47]; + 3'b110: cin_muxed = A24_xnor_B17; + 3'b111: cin_muxed = P[47]; + default: cin_muxed = 1'bx; + endcase + end + + wire alu_cin = (ALUMODEr[3] || ALUMODEr[2]) ? 1'b0 : cin_muxed; + + // ALU core + wire [47:0] Z_muxinv = ALUMODEr[0] ? ~Z : Z; + wire [47:0] xor_xyz = X ^ Y ^ Z_muxinv; + wire [47:0] maj_xyz = (X & Y) | (X & Z_muxinv) | (Y & Z_muxinv); + + wire [47:0] xor_xyz_muxed = ALUMODEr[3] ? maj_xyz : xor_xyz; + wire [47:0] maj_xyz_gated = ALUMODEr[2] ? 48'b0 : maj_xyz; + + wire [48:0] maj_xyz_simd_gated; + wire [3:0] int_carry_in, int_carry_out, ext_carry_out; + wire [47:0] alu_sum; + assign int_carry_in[0] = 1'b0; + wire [3:0] carryout_reset; + + generate + if (USE_SIMD == "FOUR12") begin + assign maj_xyz_simd_gated = { + maj_xyz_gated[47:36], + 1'b0, maj_xyz_gated[34:24], + 1'b0, maj_xyz_gated[22:12], + 1'b0, maj_xyz_gated[10:0], + alu_cin + }; + assign int_carry_in[3:1] = 3'b000; + assign ext_carry_out = { + int_carry_out[3], + maj_xyz_gated[35] ^ int_carry_out[2], + maj_xyz_gated[23] ^ int_carry_out[1], + maj_xyz_gated[11] ^ int_carry_out[0] + }; + assign carryout_reset = 4'b0000; + end else if (USE_SIMD == "TWO24") begin + assign maj_xyz_simd_gated = { + maj_xyz_gated[47:24], + 1'b0, maj_xyz_gated[22:0], + alu_cin + }; + assign int_carry_in[3:1] = {int_carry_out[2], 1'b0, int_carry_out[0]}; + assign ext_carry_out = { + int_carry_out[3], + 1'bx, + maj_xyz_gated[23] ^ int_carry_out[1], + 1'bx + }; + assign carryout_reset = 4'b0x0x; + end else begin + assign maj_xyz_simd_gated = {maj_xyz_gated, alu_cin}; + assign int_carry_in[3:1] = int_carry_out[2:0]; + assign ext_carry_out = { + int_carry_out[3], + 3'bxxx + }; + assign carryout_reset = 4'b0xxx; + end + + genvar i; + for (i = 0; i < 4; i = i + 1) + assign {int_carry_out[i], alu_sum[i*12 +: 12]} = {1'b0, maj_xyz_simd_gated[i*12 +: ((i == 3) ? 13 : 12)]} + + xor_xyz_muxed[i*12 +: 12] + int_carry_in[i]; + endgenerate + + wire signed [47:0] Pd = ALUMODEr[1] ? ~alu_sum : alu_sum; + initial P = 48'b0; + initial CARRYOUT = carryout_reset; + initial CARRYCASCOUT = 1'b0; + initial MULTSIGNOUT = 1'b0; + wire [3:0] CARRYOUTd = (OPMODEr[3:0] == 4'b0101 || ALUMODEr[3:2] != 2'b00) ? 4'bxxxx : + ((ALUMODEr[0] & ALUMODEr[1]) ? ~ext_carry_out : ext_carry_out); + wire CARRYCASCOUTd = ext_carry_out[3]; + wire MULTSIGNOUTd = Mrx[42]; + + generate + if (PREG == 1) begin + always @(posedge CLK) + if (RSTP) begin + P <= 48'b0; + CARRYOUT <= carryout_reset; + CARRYCASCOUT <= 1'b0; + MULTSIGNOUT <= 1'b0; + end else if (CEP) begin + P <= Pd; + CARRYOUT <= CARRYOUTd; + CARRYCASCOUT <= CARRYCASCOUTd; + MULTSIGNOUT <= MULTSIGNOUTd; + end + end else begin + always @* begin + P = Pd; + CARRYOUT = CARRYOUTd; + CARRYCASCOUT = CARRYCASCOUTd; + MULTSIGNOUT = MULTSIGNOUTd; + end + end + endgenerate + + assign PCOUT = P; + +endmodule diff --git a/techlibs/xilinx/cells_xtra.v b/techlibs/xilinx/cells_xtra.v index a6669b872..b8abdda64 100644 --- a/techlibs/xilinx/cells_xtra.v +++ b/techlibs/xilinx/cells_xtra.v @@ -137,89 +137,6 @@ module DNA_PORT (...); input SHIFT; endmodule -module DSP48E1 (...); - parameter integer ACASCREG = 1; - parameter integer ADREG = 1; - parameter integer ALUMODEREG = 1; - parameter integer AREG = 1; - parameter AUTORESET_PATDET = "NO_RESET"; - parameter A_INPUT = "DIRECT"; - parameter integer BCASCREG = 1; - parameter integer BREG = 1; - parameter B_INPUT = "DIRECT"; - parameter integer CARRYINREG = 1; - parameter integer CARRYINSELREG = 1; - parameter integer CREG = 1; - parameter integer DREG = 1; - parameter integer INMODEREG = 1; - parameter integer MREG = 1; - parameter integer OPMODEREG = 1; - parameter integer PREG = 1; - parameter SEL_MASK = "MASK"; - parameter SEL_PATTERN = "PATTERN"; - parameter USE_DPORT = "FALSE"; - parameter USE_MULT = "MULTIPLY"; - parameter USE_PATTERN_DETECT = "NO_PATDET"; - parameter USE_SIMD = "ONE48"; - parameter [47:0] MASK = 48'h3FFFFFFFFFFF; - parameter [47:0] PATTERN = 48'h000000000000; - parameter [3:0] IS_ALUMODE_INVERTED = 4'b0; - parameter [0:0] IS_CARRYIN_INVERTED = 1'b0; - parameter [0:0] IS_CLK_INVERTED = 1'b0; - parameter [4:0] IS_INMODE_INVERTED = 5'b0; - parameter [6:0] IS_OPMODE_INVERTED = 7'b0; - output [29:0] ACOUT; - output [17:0] BCOUT; - output CARRYCASCOUT; - output [3:0] CARRYOUT; - output MULTSIGNOUT; - output OVERFLOW; - output [47:0] P; - output PATTERNBDETECT; - output PATTERNDETECT; - output [47:0] PCOUT; - output UNDERFLOW; - input [29:0] A; - input [29:0] ACIN; - input [3:0] ALUMODE; - input [17:0] B; - input [17:0] BCIN; - input [47:0] C; - input CARRYCASCIN; - input CARRYIN; - input [2:0] CARRYINSEL; - input CEA1; - input CEA2; - input CEAD; - input CEALUMODE; - input CEB1; - input CEB2; - input CEC; - input CECARRYIN; - input CECTRL; - input CED; - input CEINMODE; - input CEM; - input CEP; - (* clkbuf_sink *) - input CLK; - input [24:0] D; - input [4:0] INMODE; - input MULTSIGNIN; - input [6:0] OPMODE; - input [47:0] PCIN; - input RSTA; - input RSTALLCARRYIN; - input RSTALUMODE; - input RSTB; - input RSTC; - input RSTCTRL; - input RSTD; - input RSTINMODE; - input RSTM; - input RSTP; -endmodule - module EFUSE_USR (...); parameter [31:0] SIM_EFUSE_VALUE = 32'h00000000; output [31:0] EFUSEUSR; diff --git a/techlibs/xilinx/dsp_map.v b/techlibs/xilinx/dsp_map.v new file mode 100644 index 000000000..cc37f0085 --- /dev/null +++ b/techlibs/xilinx/dsp_map.v @@ -0,0 +1,48 @@ +module \$__MUL25X18 (input [24:0] A, input [17:0] B, output [42:0] Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + wire [47:0] P_48; + DSP48E1 #( + // Disable all registers + .ACASCREG(0), + .ADREG(0), + .A_INPUT("DIRECT"), + .ALUMODEREG(0), + .AREG(0), + .BCASCREG(0), + .B_INPUT("DIRECT"), + .BREG(0), + .CARRYINREG(0), + .CARRYINSELREG(0), + .CREG(0), + .DREG(0), + .INMODEREG(0), + .MREG(0), + .OPMODEREG(0), + .PREG(0), + .USE_MULT("MULTIPLY"), + .USE_SIMD("ONE48") + ) _TECHMAP_REPLACE_ ( + //Data path + .A({{5{A[24]}}, A}), + .B(B), + .C(48'b0), + .D(24'b0), + .P(P_48), + + .INMODE(5'b00000), + .ALUMODE(4'b0000), + .OPMODE(7'b000101), + .CARRYINSEL(3'b000), + + .ACIN(30'b0), + .BCIN(18'b0), + .PCIN(48'b0), + .CARRYIN(1'b0) + ); + assign Y = P_48; +endmodule diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc index e0e81ef1d..3d92c3e2c 100644 --- a/techlibs/xilinx/synth_xilinx.cc +++ b/techlibs/xilinx/synth_xilinx.cc @@ -81,6 +81,8 @@ struct SynthXilinxPass : public ScriptPass log(" -nowidelut\n"); log(" do not use MUXF[78] resources to implement LUTs larger than LUT6s\n"); log("\n"); + log(" -nodsp\n"); + log(" do not use DSP48E1s to implement multipliers and associated logic\n"); log(" -iopad\n"); log(" enable I/O buffer insertion (selected automatically by -ise)\n"); log("\n"); @@ -116,7 +118,7 @@ struct SynthXilinxPass : public ScriptPass } std::string top_opt, edif_file, blif_file, family; - bool flatten, retime, vpr, ise, iopad, noiopad, noclkbuf, nobram, nolutram, nosrl, nocarry, nowidelut, abc9; + bool flatten, retime, vpr, ise, iopad, noiopad, noclkbuf, nobram, nolutram, nosrl, nocarry, nowidelut, nodsp, abc9; bool flatten_before_abc; int widemux; @@ -139,6 +141,7 @@ struct SynthXilinxPass : public ScriptPass nosrl = false; nocarry = false; nowidelut = false; + nodsp = false; abc9 = false; flatten_before_abc = false; widemux = 0; @@ -240,6 +243,10 @@ struct SynthXilinxPass : public ScriptPass abc9 = true; continue; } + if (args[argidx] == "-nodsp") { + nodsp = true; + continue; + } break; } extra_args(args, argidx, design); @@ -293,10 +300,10 @@ struct SynthXilinxPass : public ScriptPass run(stringf("hierarchy -check %s", top_opt.c_str())); } - if (check_label("coarse")) { + if (check_label("prepare")) { run("proc"); - if (help_mode || flatten) - run("flatten", "(if -flatten)"); + if (flatten || help_mode) + run("flatten", "(with '-flatten')"); run("opt_expr"); run("opt_clean"); run("check"); @@ -320,6 +327,18 @@ struct SynthXilinxPass : public ScriptPass } run("techmap -map +/cmp2lut.v -D LUT_WIDTH=6"); + } + + if (check_label("map_dsp"), "(skip if '-nodsp')") { + if (!nodsp || help_mode) { + // NB: Xilinx multipliers are signed only + run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 -D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18"); + run("xilinx_dsp"); + run("chtype -set $mul t:$__soft_mul"); + } + } + + if (check_label("coarse")) { run("alumacc"); run("share"); run("opt"); diff --git a/techlibs/xilinx/tests/.gitignore b/techlibs/xilinx/tests/.gitignore index 496b87461..ef3699bd2 100644 --- a/techlibs/xilinx/tests/.gitignore +++ b/techlibs/xilinx/tests/.gitignore @@ -4,3 +4,8 @@ bram1_[0-9]*/ bram2.log bram2_syn.v bram2_tb +dsp_work*/ +test_dsp_model_ref.v +test_dsp_model_uut.v +test_dsp_model +*.vcd diff --git a/techlibs/xilinx/tests/test_dsp_model.sh b/techlibs/xilinx/tests/test_dsp_model.sh new file mode 100644 index 000000000..2acd97eb4 --- /dev/null +++ b/techlibs/xilinx/tests/test_dsp_model.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -ex +sed 's/DSP48E1/DSP48E1_UUT/; /DSP48E1_UUT/,/endmodule/ p; d;' < ../cells_sim.v > test_dsp_model_uut.v +if [ ! -f "test_dsp_model_ref.v" ]; then + cat /opt/Xilinx/Vivado/2019.1/data/verilog/src/unisims/DSP48E1.v > test_dsp_model_ref.v +fi +for tb in simd24_preadd_noreg_nocasc simd12_preadd_noreg_nocasc \ + mult_allreg_nopreadd_nocasc mult_noreg_nopreadd_nocasc \ + mult_allreg_preadd_nocasc mult_noreg_preadd_nocasc mult_inreg_preadd_nocasc \ + +do + iverilog -s $tb -s glbl -o test_dsp_model test_dsp_model.v test_dsp_model_uut.v test_dsp_model_ref.v /opt/Xilinx/Vivado/2019.1/data/verilog/src/glbl.v + vvp -N ./test_dsp_model +done diff --git a/techlibs/xilinx/tests/test_dsp_model.v b/techlibs/xilinx/tests/test_dsp_model.v new file mode 100644 index 000000000..04d5b26ab --- /dev/null +++ b/techlibs/xilinx/tests/test_dsp_model.v @@ -0,0 +1,597 @@ +`timescale 1ns / 1ps + +module testbench; + parameter integer ACASCREG = 1; + parameter integer ADREG = 1; + parameter integer ALUMODEREG = 1; + parameter integer AREG = 1; + parameter AUTORESET_PATDET = "NO_RESET"; + parameter A_INPUT = "DIRECT"; + parameter integer BCASCREG = 1; + parameter integer BREG = 1; + parameter B_INPUT = "DIRECT"; + parameter integer CARRYINREG = 1; + parameter integer CARRYINSELREG = 1; + parameter integer CREG = 1; + parameter integer DREG = 1; + parameter integer INMODEREG = 1; + parameter integer MREG = 1; + parameter integer OPMODEREG = 1; + parameter integer PREG = 1; + parameter SEL_MASK = "MASK"; + parameter SEL_PATTERN = "PATTERN"; + parameter USE_DPORT = "FALSE"; + parameter USE_MULT = "MULTIPLY"; + parameter USE_PATTERN_DETECT = "NO_PATDET"; + parameter USE_SIMD = "ONE48"; + parameter [47:0] MASK = 48'h3FFFFFFFFFFF; + parameter [47:0] PATTERN = 48'h000000000000; + parameter [3:0] IS_ALUMODE_INVERTED = 4'b0; + parameter [0:0] IS_CARRYIN_INVERTED = 1'b0; + parameter [0:0] IS_CLK_INVERTED = 1'b0; + parameter [4:0] IS_INMODE_INVERTED = 5'b0; + parameter [6:0] IS_OPMODE_INVERTED = 7'b0; + + reg CLK; + reg CEA1, CEA2, CEAD, CEALUMODE, CEB1, CEB2, CEC, CECARRYIN, CECTRL; + reg CED, CEINMODE, CEM, CEP; + reg RSTA, RSTALLCARRYIN, RSTALUMODE, RSTB, RSTC, RSTCTRL, RSTD, RSTINMODE, RSTM, RSTP; + reg [29:0] A, ACIN; + reg [17:0] B, BCIN; + reg [47:0] C; + reg [24:0] D; + reg [47:0] PCIN; + reg [3:0] ALUMODE; + reg [2:0] CARRYINSEL; + reg [4:0] INMODE; + reg [6:0] OPMODE; + reg CARRYCASCIN, CARRYIN, MULTSIGNIN; + + output [29:0] ACOUT, REF_ACOUT; + output [17:0] BCOUT, REF_BCOUT; + output CARRYCASCOUT, REF_CARRYCASCOUT; + output [3:0] CARRYOUT, REF_CARRYOUT; + output MULTSIGNOUT, REF_MULTSIGNOUT; + output OVERFLOW, REF_OVERFLOW; + output [47:0] P, REF_P; + output PATTERNBDETECT, REF_PATTERNBDETECT; + output PATTERNDETECT, REF_PATTERNDETECT; + output [47:0] PCOUT, REF_PCOUT; + output UNDERFLOW, REF_UNDERFLOW; + + integer errcount = 0; + + reg ERROR_FLAG = 0; + + task clkcycle; + begin + #5; + CLK = ~CLK; + #10; + CLK = ~CLK; + #2; + ERROR_FLAG = 0; + if (REF_P !== P) begin + $display("ERROR at %1t: REF_P=%b UUT_P=%b DIFF=%b", $time, REF_P, P, REF_P ^ P); + errcount = errcount + 1; + ERROR_FLAG = 1; + end + if (REF_CARRYOUT !== CARRYOUT) begin + $display("ERROR at %1t: REF_CARRYOUT=%b UUT_CARRYOUT=%b", $time, REF_CARRYOUT, CARRYOUT); + errcount = errcount + 1; + ERROR_FLAG = 1; + end + #3; + end + endtask + + reg config_valid = 0; + task drc; + begin + config_valid = 1; + if (AREG != 2 && INMODE[0]) config_valid = 0; + if (BREG != 2 && INMODE[4]) config_valid = 0; + + if (USE_SIMD != "ONE48" && OPMODE[3:0] == 4'b0101) config_valid = 0; + + if (OPMODE[1:0] == 2'b10 && PREG != 1) config_valid = 0; + if ((OPMODE[3:2] == 2'b01) ^ (OPMODE[1:0] == 2'b01) == 1'b1) config_valid = 0; + if ((OPMODE[6:4] == 3'b010 || OPMODE[6:4] == 3'b110) && PREG != 1) config_valid = 0; + if ((OPMODE[6:4] == 3'b100) && (PREG != 1 || OPMODE[3:0] != 4'b1000 || ALUMODE[3:2] == 2'b01 || ALUMODE[3:2] == 2'b11)) config_valid = 0; + if ((CARRYINSEL == 3'b100 || CARRYINSEL == 3'b101 || CARRYINSEL == 3'b111) && (PREG != 1)) config_valid = 0; + if (OPMODE[6:4] == 3'b111) config_valid = 0; + if ((OPMODE[3:0] == 4'b0101) && CARRYINSEL == 3'b010) config_valid = 0; + if (CARRYINSEL == 3'b000 && OPMODE == 7'b1001000) config_valid = 0; + + if ((ALUMODE[3:2] == 2'b01 || ALUMODE[3:2] == 2'b11) && OPMODE[3:2] != 2'b00 && OPMODE[3:2] != 2'b10) config_valid = 0; + + + end + endtask + + initial begin + $dumpfile("test_dsp_model.vcd"); + $dumpvars(0, testbench); + + #2; + CLK = 1'b0; + {CEA1, CEA2, CEAD, CEALUMODE, CEB1, CEB2, CEC, CECARRYIN, CECTRL} = 9'b111111111; + {CED, CEINMODE, CEM, CEP} = 4'b1111; + + {A, B, C, D} = 0; + {ACIN, BCIN, PCIN} = 0; + {ALUMODE, CARRYINSEL, INMODE} = 0; + {OPMODE, CARRYCASCIN, CARRYIN, MULTSIGNIN} = 0; + + {RSTA, RSTALLCARRYIN, RSTALUMODE, RSTB, RSTC, RSTCTRL, RSTD, RSTINMODE, RSTM, RSTP} = ~0; + repeat (10) begin + #10; + CLK = 1'b1; + #10; + CLK = 1'b0; + #10; + CLK = 1'b1; + #10; + CLK = 1'b0; + end + {RSTA, RSTALLCARRYIN, RSTALUMODE, RSTB, RSTC, RSTCTRL, RSTD, RSTINMODE, RSTM, RSTP} = 0; + + repeat (10000) begin + clkcycle; + config_valid = 0; + while (!config_valid) begin + A = $urandom; + ACIN = $urandom; + B = $urandom; + BCIN = $urandom; + C = {$urandom, $urandom}; + D = $urandom; + PCIN = {$urandom, $urandom}; + + {CEA1, CEA2, CEAD, CEALUMODE, CEB1, CEB2, CEC, CECARRYIN, CECTRL} = $urandom | $urandom | $urandom; + {CED, CEINMODE, CEM, CEP} = $urandom | $urandom | $urandom | $urandom; + + // Otherwise we can accidentally create illegal configs + CEINMODE = CECTRL; + CEALUMODE = CECTRL; + + {RSTA, RSTALLCARRYIN, RSTALUMODE, RSTB, RSTC, RSTCTRL, RSTD, RSTINMODE, RSTM, RSTP} = $urandom & $urandom & $urandom & $urandom & $urandom & $urandom; + {ALUMODE, INMODE} = $urandom; + CARRYINSEL = $urandom & $urandom & $urandom; + OPMODE = $urandom; + if ($urandom & 1'b1) + OPMODE[3:0] = 4'b0101; // test multiply more than other modes + {CARRYCASCIN, CARRYIN, MULTSIGNIN} = $urandom; + + // So few valid options in these modes, just force one valid option + if (CARRYINSEL == 3'b001) OPMODE = 7'b1010101; + if (CARRYINSEL == 3'b010) OPMODE = 7'b0001010; + if (CARRYINSEL == 3'b011) OPMODE = 7'b0011011; + if (CARRYINSEL == 3'b100) OPMODE = 7'b0110011; + if (CARRYINSEL == 3'b101) OPMODE = 7'b0011010; + if (CARRYINSEL == 3'b110) OPMODE = 7'b0010101; + if (CARRYINSEL == 3'b111) OPMODE = 7'b0100011; + + drc; + end + end + + if (errcount == 0) begin + $display("All tests passed."); + $finish; + end else begin + $display("Caught %1d errors.", errcount); + $stop; + end + end + + DSP48E1 #( + .ACASCREG (ACASCREG), + .ADREG (ADREG), + .ALUMODEREG (ALUMODEREG), + .AREG (AREG), + .AUTORESET_PATDET (AUTORESET_PATDET), + .A_INPUT (A_INPUT), + .BCASCREG (BCASCREG), + .BREG (BREG), + .B_INPUT (B_INPUT), + .CARRYINREG (CARRYINREG), + .CARRYINSELREG (CARRYINSELREG), + .CREG (CREG), + .DREG (DREG), + .INMODEREG (INMODEREG), + .MREG (MREG), + .OPMODEREG (OPMODEREG), + .PREG (PREG), + .SEL_MASK (SEL_MASK), + .SEL_PATTERN (SEL_PATTERN), + .USE_DPORT (USE_DPORT), + .USE_MULT (USE_MULT), + .USE_PATTERN_DETECT (USE_PATTERN_DETECT), + .USE_SIMD (USE_SIMD), + .MASK (MASK), + .PATTERN (PATTERN), + .IS_ALUMODE_INVERTED(IS_ALUMODE_INVERTED), + .IS_CARRYIN_INVERTED(IS_CARRYIN_INVERTED), + .IS_CLK_INVERTED (IS_CLK_INVERTED), + .IS_INMODE_INVERTED (IS_INMODE_INVERTED), + .IS_OPMODE_INVERTED (IS_OPMODE_INVERTED) + ) ref ( + .ACOUT (REF_ACOUT), + .BCOUT (REF_BCOUT), + .CARRYCASCOUT (REF_CARRYCASCOUT), + .CARRYOUT (REF_CARRYOUT), + .MULTSIGNOUT (REF_MULTSIGNOUT), + .OVERFLOW (REF_OVERFLOW), + .P (REF_P), + .PATTERNBDETECT(REF_PATTERNBDETECT), + .PATTERNDETECT (REF_PATTERNDETECT), + .PCOUT (REF_PCOUT), + .UNDERFLOW (REF_UNDERFLOW), + .A (A), + .ACIN (ACIN), + .ALUMODE (ALUMODE), + .B (B), + .BCIN (BCIN), + .C (C), + .CARRYCASCIN (CARRYCASCIN), + .CARRYINSEL (CARRYINSEL), + .CEA1 (CEA1), + .CEA2 (CEA2), + .CEAD (CEAD), + .CEALUMODE (CEALUMODE), + .CEB1 (CEB1), + .CEB2 (CEB2), + .CEC (CEC), + .CECARRYIN (CECARRYIN), + .CECTRL (CECTRL), + .CED (CED), + .CEINMODE (CEINMODE), + .CEM (CEM), + .CEP (CEP), + .CLK (CLK), + .D (D), + .INMODE (INMODE), + .MULTSIGNIN (MULTSIGNIN), + .OPMODE (OPMODE), + .PCIN (PCIN), + .RSTA (RSTA), + .RSTALLCARRYIN (RSTALLCARRYIN), + .RSTALUMODE (RSTALUMODE), + .RSTB (RSTB), + .RSTC (RSTC), + .RSTCTRL (RSTCTRL), + .RSTD (RSTD), + .RSTINMODE (RSTINMODE), + .RSTM (RSTM), + .RSTP (RSTP) + ); + + DSP48E1_UUT #( + .ACASCREG (ACASCREG), + .ADREG (ADREG), + .ALUMODEREG (ALUMODEREG), + .AREG (AREG), + .AUTORESET_PATDET (AUTORESET_PATDET), + .A_INPUT (A_INPUT), + .BCASCREG (BCASCREG), + .BREG (BREG), + .B_INPUT (B_INPUT), + .CARRYINREG (CARRYINREG), + .CARRYINSELREG (CARRYINSELREG), + .CREG (CREG), + .DREG (DREG), + .INMODEREG (INMODEREG), + .MREG (MREG), + .OPMODEREG (OPMODEREG), + .PREG (PREG), + .SEL_MASK (SEL_MASK), + .SEL_PATTERN (SEL_PATTERN), + .USE_DPORT (USE_DPORT), + .USE_MULT (USE_MULT), + .USE_PATTERN_DETECT (USE_PATTERN_DETECT), + .USE_SIMD (USE_SIMD), + .MASK (MASK), + .PATTERN (PATTERN), + .IS_ALUMODE_INVERTED(IS_ALUMODE_INVERTED), + .IS_CARRYIN_INVERTED(IS_CARRYIN_INVERTED), + .IS_CLK_INVERTED (IS_CLK_INVERTED), + .IS_INMODE_INVERTED (IS_INMODE_INVERTED), + .IS_OPMODE_INVERTED (IS_OPMODE_INVERTED) + ) uut ( + .ACOUT (ACOUT), + .BCOUT (BCOUT), + .CARRYCASCOUT (CARRYCASCOUT), + .CARRYOUT (CARRYOUT), + .MULTSIGNOUT (MULTSIGNOUT), + .OVERFLOW (OVERFLOW), + .P (P), + .PATTERNBDETECT(PATTERNBDETECT), + .PATTERNDETECT (PATTERNDETECT), + .PCOUT (PCOUT), + .UNDERFLOW (UNDERFLOW), + .A (A), + .ACIN (ACIN), + .ALUMODE (ALUMODE), + .B (B), + .BCIN (BCIN), + .C (C), + .CARRYCASCIN (CARRYCASCIN), + .CARRYINSEL (CARRYINSEL), + .CEA1 (CEA1), + .CEA2 (CEA2), + .CEAD (CEAD), + .CEALUMODE (CEALUMODE), + .CEB1 (CEB1), + .CEB2 (CEB2), + .CEC (CEC), + .CECARRYIN (CECARRYIN), + .CECTRL (CECTRL), + .CED (CED), + .CEINMODE (CEINMODE), + .CEM (CEM), + .CEP (CEP), + .CLK (CLK), + .D (D), + .INMODE (INMODE), + .MULTSIGNIN (MULTSIGNIN), + .OPMODE (OPMODE), + .PCIN (PCIN), + .RSTA (RSTA), + .RSTALLCARRYIN (RSTALLCARRYIN), + .RSTALUMODE (RSTALUMODE), + .RSTB (RSTB), + .RSTC (RSTC), + .RSTCTRL (RSTCTRL), + .RSTD (RSTD), + .RSTINMODE (RSTINMODE), + .RSTM (RSTM), + .RSTP (RSTP) + ); +endmodule + +module mult_noreg_nopreadd_nocasc; + testbench #( + .ACASCREG (0), + .ADREG (0), + .ALUMODEREG (0), + .AREG (0), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (0), + .BREG (0), + .B_INPUT ("DIRECT"), + .CARRYINREG (0), + .CARRYINSELREG (0), + .CREG (0), + .DREG (0), + .INMODEREG (0), + .MREG (0), + .OPMODEREG (0), + .PREG (0), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("ONE48"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + +module mult_allreg_nopreadd_nocasc; + testbench #( + .ACASCREG (1), + .ADREG (1), + .ALUMODEREG (1), + .AREG (2), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (1), + .BREG (2), + .B_INPUT ("DIRECT"), + .CARRYINREG (1), + .CARRYINSELREG (1), + .CREG (1), + .DREG (1), + .INMODEREG (1), + .MREG (1), + .OPMODEREG (1), + .PREG (1), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("ONE48"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + +module mult_noreg_preadd_nocasc; + testbench #( + .ACASCREG (0), + .ADREG (0), + .ALUMODEREG (0), + .AREG (0), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (0), + .BREG (0), + .B_INPUT ("DIRECT"), + .CARRYINREG (0), + .CARRYINSELREG (0), + .CREG (0), + .DREG (0), + .INMODEREG (0), + .MREG (0), + .OPMODEREG (0), + .PREG (0), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("TRUE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("ONE48"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + +module mult_allreg_preadd_nocasc; + testbench #( + .ACASCREG (1), + .ADREG (1), + .ALUMODEREG (1), + .AREG (2), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (1), + .BREG (2), + .B_INPUT ("DIRECT"), + .CARRYINREG (1), + .CARRYINSELREG (1), + .CREG (1), + .DREG (1), + .INMODEREG (1), + .MREG (1), + .OPMODEREG (1), + .PREG (1), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("TRUE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("ONE48"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + +module mult_inreg_preadd_nocasc; + testbench #( + .ACASCREG (1), + .ADREG (0), + .ALUMODEREG (0), + .AREG (1), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (1), + .BREG (1), + .B_INPUT ("DIRECT"), + .CARRYINREG (0), + .CARRYINSELREG (0), + .CREG (1), + .DREG (1), + .INMODEREG (0), + .MREG (0), + .OPMODEREG (0), + .PREG (0), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("TRUE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("ONE48"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + +module simd12_preadd_noreg_nocasc; + testbench #( + .ACASCREG (0), + .ADREG (0), + .ALUMODEREG (0), + .AREG (0), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (0), + .BREG (0), + .B_INPUT ("DIRECT"), + .CARRYINREG (0), + .CARRYINSELREG (0), + .CREG (0), + .DREG (0), + .INMODEREG (0), + .MREG (0), + .OPMODEREG (0), + .PREG (0), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("TRUE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("FOUR12"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule + + +module simd24_preadd_noreg_nocasc; + testbench #( + .ACASCREG (0), + .ADREG (0), + .ALUMODEREG (0), + .AREG (0), + .AUTORESET_PATDET ("NO_RESET"), + .A_INPUT ("DIRECT"), + .BCASCREG (0), + .BREG (0), + .B_INPUT ("DIRECT"), + .CARRYINREG (0), + .CARRYINSELREG (0), + .CREG (0), + .DREG (0), + .INMODEREG (0), + .MREG (0), + .OPMODEREG (0), + .PREG (0), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + .USE_DPORT ("TRUE"), + .USE_MULT ("DYNAMIC"), + .USE_PATTERN_DETECT ("NO_PATDET"), + .USE_SIMD ("TWO24"), + .MASK (48'h3FFFFFFFFFFF), + .PATTERN (48'h000000000000), + .IS_ALUMODE_INVERTED(4'b0), + .IS_CARRYIN_INVERTED(1'b0), + .IS_CLK_INVERTED (1'b0), + .IS_INMODE_INVERTED (5'b0), + .IS_OPMODE_INVERTED (7'b0) + ) testbench (); +endmodule
\ No newline at end of file diff --git a/tests/simple/peepopt.v b/tests/simple/peepopt.v deleted file mode 100644 index 1bf427897..000000000 --- a/tests/simple/peepopt.v +++ /dev/null @@ -1,13 +0,0 @@ -module peepopt_shiftmul_0 #(parameter N=3, parameter W=3) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output [W-1:0] o); -assign o = i[s*W+:W]; -endmodule - -module peepopt_shiftmul_1 (output y, input [2:0] w); -assign y = 1'b1 >> (w * (3'b110)); -endmodule - -module peepopt_muldiv_0(input [1:0] i, output [1:0] o); -wire [3:0] t; -assign t = i * 3; -assign o = t / 3; -endmodule diff --git a/tests/various/peepopt.ys b/tests/various/peepopt.ys new file mode 100644 index 000000000..91db22423 --- /dev/null +++ b/tests/various/peepopt.ys @@ -0,0 +1,63 @@ +read_verilog <<EOT +module peepopt_shiftmul_0 #(parameter N=3, parameter W=3) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output [W-1:0] o); +assign o = i[s*W+:W]; +endmodule +EOT + +prep -nokeepdc +equiv_opt peepopt +design -load postopt +clean +select -assert-count 1 t:$shiftx +select -assert-count 0 t:$shiftx t:* %D + +#################### + +design -reset +read_verilog <<EOT +module peepopt_shiftmul_1 (output [7:0] y, input [2:0] w); +assign y = 1'b1 >> (w * (3'b110)); +endmodule +EOT + +prep -nokeepdc +equiv_opt peepopt +design -load postopt +clean +select -assert-count 1 t:$shr +select -assert-count 1 t:$mul +select -assert-count 0 t:$shr t:$mul %% t:* %D + +#################### + +design -reset +read_verilog <<EOT +module peepopt_muldiv_0(input [1:0] i, output [1:0] o); +wire [3:0] t; +assign t = i * 3; +assign o = t / 3; +endmodule +EOT + +prep -nokeepdc +equiv_opt peepopt +design -load postopt +clean +select -assert-count 0 t:* + +#################### + +design -reset +read_verilog <<EOT +module peepopt_dffmuxext_signed(input clk, ce, input signed [1:0] i, output reg signed [3:0] o); + always @(posedge clk) if (ce) o <= i; +endmodule +EOT + +prep -nokeepdc +equiv_opt peepopt +design -load postopt +clean +select -assert-count 1 t:$dff r:WIDTH=2 %i +select -assert-count 1 t:$mux r:WIDTH=2 %i +select -assert-count 0 t:$dff t:$mux %% t:* %D diff --git a/tests/xilinx/dsp_simd.ys b/tests/xilinx/dsp_simd.ys new file mode 100644 index 000000000..956952327 --- /dev/null +++ b/tests/xilinx/dsp_simd.ys @@ -0,0 +1,25 @@ +read_verilog <<EOT +module simd(input [12*4-1:0] a, input [12*4-1:0] b, (* use_dsp="simd" *) output [7*12-1:0] o12, (* use_dsp="simd" *) output [2*24-1:0] o24); +generate + genvar i; + // 4 x 12-bit adder + for (i = 0; i < 4; i++) + assign o12[i*12+:12] = a[i*12+:12] + b[i*12+:12]; + // 2 x 24-bit subtract + for (i = 0; i < 2; i++) + assign o24[i*24+:24] = a[i*24+:24] - b[i*24+:24]; +endgenerate +reg [3*12-1:0] ro; +always @* begin + ro[0*12+:12] = a[0*10+:10] + b[0*10+:10]; + ro[1*12+:12] = a[1*10+:10] + b[1*10+:10]; + ro[2*12+:12] = a[2*8+:8] + b[2*8+:8]; +end +assign o12[4*12+:3*12] = ro; +endmodule +EOT + +proc +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx +design -load postopt +select -assert-count 3 t:DSP48E1 diff --git a/tests/xilinx/macc.v b/tests/xilinx/macc.v new file mode 100644 index 000000000..0bb673316 --- /dev/null +++ b/tests/xilinx/macc.v @@ -0,0 +1,41 @@ +// Signed 40-bit streaming accumulator with 16-bit inputs +// File: HDL_Coding_Techniques/multipliers/multipliers4.v +// +// Source: +// https://www.xilinx.com/support/documentation/sw_manuals/xilinx2014_2/ug901-vivado-synthesis.pdf p.90 +// +module macc # (parameter SIZEIN = 16, SIZEOUT = 40) ( + input clk, ce, sload, + input signed [SIZEIN-1:0] a, b, + output signed [SIZEOUT-1:0] accum_out +); +// Declare registers for intermediate values +reg signed [SIZEIN-1:0] a_reg, b_reg; +reg sload_reg; +reg signed [2*SIZEIN-1:0] mult_reg; +reg signed [SIZEOUT-1:0] adder_out, old_result; +always @* /*(adder_out or sload_reg)*/ begin // Modification necessary to fix sim/synth mismatch + if (sload_reg) + old_result <= 0; + else + // 'sload' is now active (=low) and opens the accumulation loop. + // The accumulator takes the next multiplier output in + // the same cycle. + old_result <= adder_out; +end + +always @(posedge clk) + if (ce) + begin + a_reg <= a; + b_reg <= b; + mult_reg <= a_reg * b_reg; + sload_reg <= sload; + // Store accumulation result into a register + adder_out <= old_result + mult_reg; + end + + // Output accumulation result + assign accum_out = adder_out; + +endmodule diff --git a/tests/xilinx/macc.ys b/tests/xilinx/macc.ys new file mode 100644 index 000000000..de408162c --- /dev/null +++ b/tests/xilinx/macc.ys @@ -0,0 +1,13 @@ +read_verilog macc.v +proc +hierarchy -auto-top +#equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx ### TODO +equiv_opt -run :prove -map +/xilinx/cells_sim.v synth_xilinx +miter -equiv -flatten -make_assert -make_outputs gold gate miter +sat -verify -prove-asserts -seq 10 -show-inputs -show-outputs miter +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd macc # Constrain all select calls below inside the top module +select -assert-count 1 t:BUFG +select -assert-count 1 t:FDRE +select -assert-count 1 t:DSP48E1 +select -assert-none t:BUFG t:FDRE t:DSP48E1 %% t:* %D diff --git a/tests/xilinx/mul_unsigned.v b/tests/xilinx/mul_unsigned.v new file mode 100644 index 000000000..e3713a642 --- /dev/null +++ b/tests/xilinx/mul_unsigned.v @@ -0,0 +1,30 @@ +/* +Example from: https://www.xilinx.com/support/documentation/sw_manuals/xilinx2019_1/ug901-vivado-synthesis.pdf [p. 89]. +*/ + +// Unsigned 16x24-bit Multiplier +// 1 latency stage on operands +// 3 latency stage after the multiplication +// File: multipliers2.v +// +module mul_unsigned (clk, A, B, RES); +parameter WIDTHA = /*16*/ 6; +parameter WIDTHB = /*24*/ 9; +input clk; +input [WIDTHA-1:0] A; +input [WIDTHB-1:0] B; +output [WIDTHA+WIDTHB-1:0] RES; +reg [WIDTHA-1:0] rA; +reg [WIDTHB-1:0] rB; +reg [WIDTHA+WIDTHB-1:0] M [3:0]; +integer i; +always @(posedge clk) + begin + rA <= A; + rB <= B; + M[0] <= rA * rB; + for (i = 0; i < 3; i = i+1) + M[i+1] <= M[i]; + end +assign RES = M[3]; +endmodule diff --git a/tests/xilinx/mul_unsigned.ys b/tests/xilinx/mul_unsigned.ys new file mode 100644 index 000000000..30c034afe --- /dev/null +++ b/tests/xilinx/mul_unsigned.ys @@ -0,0 +1,11 @@ +read_verilog mul_unsigned.v +proc +hierarchy -top mul_unsigned +equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx # equivalency check +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd mul_unsigned # Constrain all select calls below inside the top module +stat +select -assert-count 1 t:BUFG +select -assert-count 1 t:DSP48E1 +select -assert-count 30 t:FDRE +select -assert-none t:DSP48E1 t:FDRE t:BUFG %% t:* %D |