aboutsummaryrefslogtreecommitdiffstats
path: root/mistral/lab.cc
diff options
context:
space:
mode:
Diffstat (limited to 'mistral/lab.cc')
-rw-r--r--mistral/lab.cc969
1 files changed, 969 insertions, 0 deletions
diff --git a/mistral/lab.cc b/mistral/lab.cc
new file mode 100644
index 00000000..abd0fec3
--- /dev/null
+++ b/mistral/lab.cc
@@ -0,0 +1,969 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "design_utils.h"
+#include "log.h"
+#include "nextpnr.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+// This file contains functions related to our custom LAB structure, including creating the LAB bels; checking the
+// legality of LABs; and manipulating LUT inputs and equations
+
+// LAB/ALM structure creation functions
+namespace {
+static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx)
+{
+ auto &lab = arch->labs.at(lab_idx);
+ auto &alm = lab.alms.at(z);
+ // Create the combinational part of ALMs.
+ // There are two of these, for the two LUT outputs, and these also contain the carry chain and associated logic
+ // Each one has all 8 ALM inputs as input pins. In many cases only a subset of these are used; depending on mode;
+ // and the bel-cell pin mappings are used to handle this post-placement without losing flexibility
+ for (int i = 0; i < 2; i++) {
+ // Carry/share wires are a bit tricky due to all the different permutations
+ WireId carry_in, share_in;
+ WireId carry_out, share_out;
+ if (z == 0 && i == 0) {
+ carry_in = arch->add_wire(x, y, id_CI);
+ share_in = arch->add_wire(x, y, id_SHAREIN);
+ if (y < (arch->getGridDimY() - 1)) {
+ // Carry is split at tile boundary (TTO_DIS bit), add a PIP to represent this.
+ // TODO: what about BTO_DIS, in the middle of the LAB?
+ arch->add_pip(arch->add_wire(x, y + 1, id_CO), carry_in);
+ arch->add_pip(arch->add_wire(x, y + 1, id_SHAREOUT), share_in);
+ }
+ } else {
+ // Output from last combinational unit
+ carry_in = arch->add_wire(x, y, arch->id(stringf("CARRY[%d]", (z * 2 + i) - 1)));
+ share_in = arch->add_wire(x, y, arch->id(stringf("SHARE[%d]", (z * 2 + i) - 1)));
+ }
+
+ if (z == 9 && i == 1) {
+ carry_out = arch->add_wire(x, y, id_CO);
+ share_out = arch->add_wire(x, y, id_SHAREOUT);
+ } else {
+ carry_out = arch->add_wire(x, y, arch->id(stringf("CARRY[%d]", z * 2 + i)));
+ share_out = arch->add_wire(x, y, arch->id(stringf("SHARE[%d]", z * 2 + i)));
+ }
+
+ BelId bel = arch->add_bel(x, y, arch->id(stringf("ALM%d_COMB%d", z, i)), id_MISTRAL_COMB);
+ // LUT/MUX inputs
+ arch->add_bel_pin(bel, id_A, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::A));
+ arch->add_bel_pin(bel, id_B, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::B));
+ arch->add_bel_pin(bel, id_C, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::C));
+ arch->add_bel_pin(bel, id_D, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::D));
+ arch->add_bel_pin(bel, id_E0, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::E0));
+ arch->add_bel_pin(bel, id_E1, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::E1));
+ arch->add_bel_pin(bel, id_F0, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::F0));
+ arch->add_bel_pin(bel, id_F1, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::F1));
+ // Carry/share chain
+ arch->add_bel_pin(bel, id_CI, PORT_IN, carry_in);
+ arch->add_bel_pin(bel, id_SHAREIN, PORT_IN, share_in);
+ arch->add_bel_pin(bel, id_CO, PORT_OUT, carry_out);
+ arch->add_bel_pin(bel, id_SHAREOUT, PORT_OUT, share_out);
+ // Combinational output
+ alm.comb_out[i] = arch->add_wire(x, y, arch->id(stringf("COMBOUT[%d]", z * 2 + i)));
+ arch->add_bel_pin(bel, id_COMBOUT, PORT_OUT, alm.comb_out[i]);
+ // Assign indexing
+ alm.lut_bels.at(i) = bel;
+ auto &b = arch->bel_data(bel);
+ b.lab_data.lab = lab_idx;
+ b.lab_data.alm = z;
+ b.lab_data.idx = i;
+ }
+ // Create the control set and E/F selection - which is per pair of FF
+ for (int i = 0; i < 2; i++) {
+ // Wires
+ alm.sel_clk[i] = arch->add_wire(x, y, arch->id(stringf("CLK%c[%d]", i ? 'B' : 'T', z)));
+ alm.sel_ena[i] = arch->add_wire(x, y, arch->id(stringf("ENA%c[%d]", i ? 'B' : 'T', z)));
+ alm.sel_aclr[i] = arch->add_wire(x, y, arch->id(stringf("ACLR%c[%d]", i ? 'B' : 'T', z)));
+ alm.sel_ef[i] = arch->add_wire(x, y, arch->id(stringf("%cEF[%d]", i ? 'B' : 'T', z)));
+ // Muxes - three CLK/ENA per LAB, two ACLR
+ for (int j = 0; j < 3; j++) {
+ arch->add_pip(lab.clk_wires[j], alm.sel_clk[i]);
+ arch->add_pip(lab.ena_wires[j], alm.sel_ena[i]);
+ if (j < 2)
+ arch->add_pip(lab.aclr_wires[j], alm.sel_aclr[i]);
+ }
+ // E/F pips
+ // Note that the F choice is mirrored, F from the other half is picked
+ arch->add_pip(arch->get_port(CycloneV::LAB, x, y, z, i ? CycloneV::E1 : CycloneV::E0), alm.sel_ef[i]);
+ arch->add_pip(arch->get_port(CycloneV::LAB, x, y, z, i ? CycloneV::F0 : CycloneV::F1), alm.sel_ef[i]);
+ }
+
+ // Create the flipflops and associated routing
+ const CycloneV::port_type_t outputs[4] = {CycloneV::FFT0, CycloneV::FFT1, CycloneV::FFB0, CycloneV::FFB1};
+ const CycloneV::port_type_t l_outputs[4] = {CycloneV::FFT1L, CycloneV::FFB1L};
+
+ for (int i = 0; i < 4; i++) {
+ // FF input, selected by *PKREG*
+ alm.ff_in[i] = arch->add_wire(x, y, arch->id(stringf("FFIN[%d]", (z * 4) + i)));
+ arch->add_pip(alm.comb_out[i / 2], alm.ff_in[i]);
+ arch->add_pip(alm.sel_ef[i / 2], alm.ff_in[i]);
+ // FF bel
+ BelId bel = arch->add_bel(x, y, arch->id(stringf("ALM%d_FF%d", z, i)), id_MISTRAL_FF);
+ arch->add_bel_pin(bel, id_CLK, PORT_IN, alm.sel_clk[i / 2]);
+ arch->add_bel_pin(bel, id_ENA, PORT_IN, alm.sel_ena[i / 2]);
+ arch->add_bel_pin(bel, id_ACLR, PORT_IN, alm.sel_aclr[i / 2]);
+ arch->add_bel_pin(bel, id_SCLR, PORT_IN, lab.sclr_wire);
+ arch->add_bel_pin(bel, id_SLOAD, PORT_IN, lab.sload_wire);
+ arch->add_bel_pin(bel, id_DATAIN, PORT_IN, alm.ff_in[i]);
+ arch->add_bel_pin(bel, id_SDATA, PORT_IN, alm.sel_ef[i / 2]);
+
+ // FF output
+ alm.ff_out[i] = arch->add_wire(x, y, arch->id(stringf("FFOUT[%d]", (z * 4) + i)));
+ arch->add_bel_pin(bel, id_Q, PORT_OUT, alm.ff_out[i]);
+ // Output mux (*DFF*)
+ WireId out = arch->get_port(CycloneV::LAB, x, y, z, outputs[i]);
+ arch->add_pip(alm.ff_out[i], out);
+ arch->add_pip(alm.comb_out[i / 2], out);
+ // 'L' output mux where applicable
+ if (i == 1 || i == 3) {
+ WireId l_out = arch->get_port(CycloneV::LAB, x, y, z, l_outputs[i / 2]);
+ arch->add_pip(alm.ff_out[i], l_out);
+ arch->add_pip(alm.comb_out[i / 2], l_out);
+ }
+
+ lab.alms.at(z).ff_bels.at(i) = bel;
+ auto &b = arch->bel_data(bel);
+ b.lab_data.lab = lab_idx;
+ b.lab_data.alm = z;
+ b.lab_data.idx = i;
+ }
+}
+} // namespace
+
+void Arch::create_lab(int x, int y)
+{
+ uint32_t lab_idx = labs.size();
+ labs.emplace_back();
+
+ auto &lab = labs.back();
+
+ // Create common control set configuration. This is actually a subset of what's possible, but errs on the side of
+ // caution due to incomplete documentation
+
+ // Clocks - hardcode to CLKA choices, as both CLKA and CLKB coming from general routing causes unexpected
+ // permutations
+ for (int i = 0; i < 3; i++) {
+ lab.clk_wires[i] = add_wire(x, y, id(stringf("CLK%d", i)));
+ add_pip(get_port(CycloneV::LAB, x, y, -1, CycloneV::CLKIN, 0), lab.clk_wires[i]); // dedicated routing
+ add_pip(get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 0), lab.clk_wires[i]); // general routing
+ }
+
+ // Enables - while it looks from the config like there are choices for these, it seems like EN0_SEL actually selects
+ // SCLR not ENA0 and EN1_SEL actually selects SLOAD?
+ lab.ena_wires[0] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 2);
+ lab.ena_wires[1] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3);
+ lab.ena_wires[2] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 0);
+
+ // ACLRs - only consider general routing for now
+ lab.aclr_wires[0] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3);
+ lab.aclr_wires[1] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 2);
+
+ // SCLR and SLOAD - as above it seems like these might be selectable using the "EN*_SEL" bits but play it safe for
+ // now
+ lab.sclr_wire = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3);
+ lab.sload_wire = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 1);
+
+ for (int i = 0; i < 10; i++) {
+ create_alm(this, x, y, i, lab_idx);
+ }
+}
+
+// Cell handling and annotation functions
+namespace {
+ControlSig get_ctrlsig(const Context *ctx, const CellInfo *cell, IdString port, bool explicit_const = false)
+{
+ ControlSig result;
+ result.net = get_net_or_empty(cell, port);
+ if (result.net == nullptr && explicit_const) {
+ // For ENA, 1 (and 0) are explicit control set choices even though they aren't routed, as "no ENA" still
+ // consumes a clock+ENA pair
+ CellPinState st = PIN_1;
+ result.net = ctx->nets.at((st == PIN_1) ? ctx->id("$PACKER_VCC_NET") : ctx->id("$PACKER_GND_NET")).get();
+ }
+ if (cell->pin_data.count(port))
+ result.inverted = cell->pin_data.at(port).state == PIN_INV;
+ else
+ result.inverted = false;
+ return result;
+}
+} // namespace
+
+bool Arch::is_comb_cell(IdString cell_type) const
+{
+ // Return true if a cell is a combinational cell type, to be a placed at a MISTRAL_COMB location
+ switch (cell_type.index) {
+ case ID_MISTRAL_ALUT6:
+ case ID_MISTRAL_ALUT5:
+ case ID_MISTRAL_ALUT4:
+ case ID_MISTRAL_ALUT3:
+ case ID_MISTRAL_ALUT2:
+ case ID_MISTRAL_NOT:
+ case ID_MISTRAL_CONST:
+ case ID_MISTRAL_ALUT_ARITH:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Arch::assign_comb_info(CellInfo *cell) const
+{
+ cell->combInfo.is_carry = false;
+ cell->combInfo.is_shared = false;
+ cell->combInfo.is_extended = false;
+ cell->combInfo.carry_start = false;
+ cell->combInfo.carry_end = false;
+ cell->combInfo.chain_shared_input_count = 0;
+
+ if (cell->type == id_MISTRAL_ALUT_ARITH) {
+ cell->combInfo.is_carry = true;
+ cell->combInfo.lut_input_count = 5;
+ cell->combInfo.lut_bits_count = 32;
+
+ // This is a special case in terms of naming
+ const std::array<IdString, 5> arith_pins{id_A, id_B, id_C, id_D0, id_D1};
+ {
+ int i = 0;
+ for (auto pin : arith_pins) {
+ cell->combInfo.lut_in[i++] = get_net_or_empty(cell, pin);
+ }
+ }
+
+ const NetInfo *ci = get_net_or_empty(cell, id_CI);
+ const NetInfo *co = get_net_or_empty(cell, id_CO);
+
+ cell->combInfo.comb_out = get_net_or_empty(cell, id_SO);
+ cell->combInfo.carry_start = (ci == nullptr) || (ci->driver.cell == nullptr);
+ cell->combInfo.carry_end = (co == nullptr) || (co->users.empty());
+
+ // Compute cross-ALM routing sharing - only check the z=0 case inside ALMs
+ if (cell->constr_z > 0 && ((cell->constr_z % 2) == 0) && ci) {
+ const CellInfo *prev = ci->driver.cell;
+ if (prev != nullptr) {
+ for (int i = 0; i < 5; i++) {
+ const NetInfo *a = get_net_or_empty(cell, arith_pins[i]);
+ if (a == nullptr)
+ continue;
+ const NetInfo *b = get_net_or_empty(prev, arith_pins[i]);
+ if (a == b)
+ ++cell->combInfo.chain_shared_input_count;
+ }
+ }
+ }
+
+ } else {
+ cell->combInfo.lut_input_count = 0;
+ switch (cell->type.index) {
+ case ID_MISTRAL_ALUT6:
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[5] = get_net_or_empty(cell, id_F);
+ [[fallthrough]];
+ case ID_MISTRAL_ALUT5:
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[4] = get_net_or_empty(cell, id_E);
+ [[fallthrough]];
+ case ID_MISTRAL_ALUT4:
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[3] = get_net_or_empty(cell, id_D);
+ [[fallthrough]];
+ case ID_MISTRAL_ALUT3:
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[2] = get_net_or_empty(cell, id_C);
+ [[fallthrough]];
+ case ID_MISTRAL_ALUT2:
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[1] = get_net_or_empty(cell, id_B);
+ [[fallthrough]];
+ case ID_MISTRAL_BUF: // used to route through to FFs etc
+ case ID_MISTRAL_NOT: // used for inverters that map to LUTs
+ ++cell->combInfo.lut_input_count;
+ cell->combInfo.lut_in[0] = get_net_or_empty(cell, id_A);
+ [[fallthrough]];
+ case ID_MISTRAL_CONST:
+ // MISTRAL_CONST is a nextpnr-inserted cell type for 0-input, constant-generating LUTs
+ break;
+ default:
+ log_error("unexpected combinational cell type %s\n", getCtx()->nameOf(cell->type));
+ }
+ // Note that this relationship won't hold for extended mode, when that is supported
+ cell->combInfo.lut_bits_count = (1 << cell->combInfo.lut_input_count);
+ }
+ cell->combInfo.used_lut_input_count = 0;
+ for (int i = 0; i < cell->combInfo.lut_input_count; i++)
+ if (cell->combInfo.lut_in[i])
+ ++cell->combInfo.used_lut_input_count;
+}
+
+void Arch::assign_ff_info(CellInfo *cell) const
+{
+ cell->ffInfo.ctrlset.clk = get_ctrlsig(getCtx(), cell, id_CLK);
+ cell->ffInfo.ctrlset.ena = get_ctrlsig(getCtx(), cell, id_ENA, true);
+ cell->ffInfo.ctrlset.aclr = get_ctrlsig(getCtx(), cell, id_ACLR);
+ cell->ffInfo.ctrlset.sclr = get_ctrlsig(getCtx(), cell, id_SCLR);
+ cell->ffInfo.ctrlset.sload = get_ctrlsig(getCtx(), cell, id_SLOAD);
+ // If SCLR is used, but SLOAD isn't, then it seems like we need to pretend as if SLOAD is connected GND (so set
+ // [BT]SLOAD_EN inside the ALMs, and clear SLOAD_INV)
+ if (cell->ffInfo.ctrlset.sclr.net != nullptr && cell->ffInfo.ctrlset.sload.net == nullptr) {
+ cell->ffInfo.ctrlset.sload.net = nets.at(id("$PACKER_GND_NET")).get();
+ cell->ffInfo.ctrlset.sload.inverted = false;
+ }
+
+ cell->ffInfo.sdata = get_net_or_empty(cell, id_SDATA);
+ cell->ffInfo.datain = get_net_or_empty(cell, id_DATAIN);
+}
+
+// Validity checking functions
+bool Arch::is_alm_legal(uint32_t lab, uint8_t alm) const
+{
+ auto &alm_data = labs.at(lab).alms.at(alm);
+ // Get cells into an array for fast access
+ std::array<const CellInfo *, 2> luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])};
+ std::array<const CellInfo *, 4> ffs{getBoundBelCell(alm_data.ff_bels[0]), getBoundBelCell(alm_data.ff_bels[1]),
+ getBoundBelCell(alm_data.ff_bels[2]), getBoundBelCell(alm_data.ff_bels[3])};
+ int used_lut_bits = 0;
+
+ int total_lut_inputs = 0;
+ // TODO: for more complex modes like extended/arithmetic, it might not always be possible for any LUT input to map
+ // to any of the ALM half inputs particularly shared and extended mode will need more thought and probably for this
+ // to be revisited
+ for (int i = 0; i < 2; i++) {
+ if (!luts[i])
+ continue;
+ total_lut_inputs += luts[i]->combInfo.lut_input_count;
+ used_lut_bits += luts[i]->combInfo.lut_bits_count;
+ }
+ // An ALM only has 64 bits of storage. In theory some of these cases might be legal because of overlap between the
+ // two functions, but the current placer is unlikely to stumble upon these cases frequently without anything to
+ // guide it, and the cost of checking them here almost certainly outweighs any marginal benefit in supporting them,
+ // at least for now.
+ if (used_lut_bits > 64)
+ return false;
+
+ if (total_lut_inputs > 8) {
+ NPNR_ASSERT(luts[0] && luts[1]); // something has gone badly wrong if this fails!
+ // Make sure that LUT inputs are not overprovisioned
+ int shared_lut_inputs = 0;
+ // Even though this N^2 search looks inefficient, it's unlikely a set lookup or similar is going to be much
+ // better given the low N.
+ for (int i = 0; i < luts[1]->combInfo.lut_input_count; i++) {
+ const NetInfo *sig = luts[1]->combInfo.lut_in[i];
+ for (int j = 0; j < luts[0]->combInfo.lut_input_count; j++) {
+ if (sig == luts[0]->combInfo.lut_in[j]) {
+ ++shared_lut_inputs;
+ break;
+ }
+ }
+ }
+ if ((total_lut_inputs - shared_lut_inputs) > 8)
+ return false;
+ }
+
+ bool carry_mode = false;
+
+ // No mixing of carry and non-carry
+ if (luts[0] && luts[1] && luts[0]->combInfo.is_carry != luts[1]->combInfo.is_carry)
+ return false;
+
+ // For each ALM half; check FF control set sharing and input routeability
+ for (int i = 0; i < 2; i++) {
+ // There are two ways to route from the fabric into FF data - either routing through a LUT or using the E/F
+ // signals and SLOAD=1 (*PKREF*)
+ bool route_thru_lut_avail = !luts[i] && !carry_mode && (total_lut_inputs < 8) && (used_lut_bits < 64);
+ // E/F is available if this LUT is using 3 or fewer inputs - this is conservative and sharing can probably
+ // improve this situation. (1 - i) because the F input to EF_SEL is mirrored.
+ bool ef_available = (!luts[1 - i] || (luts[1 - i]->combInfo.used_lut_input_count <= 2));
+ // Control set checking
+ bool found_ff = false;
+
+ FFControlSet ctrlset;
+ for (int j = 0; j < 2; j++) {
+ const CellInfo *ff = ffs[i * 2 + j];
+ if (!ff)
+ continue;
+ if (j == 1)
+ return false; // TODO: why are these FFs broken?
+ if (found_ff) {
+ // Two FFs in the same half with an incompatible control set
+ if (ctrlset != ff->ffInfo.ctrlset)
+ return false;
+ } else {
+ ctrlset = ff->ffInfo.ctrlset;
+ }
+ // SDATA must use the E/F input
+ // TODO: rare case of two FFs with the same SDATA in the same ALM half
+ if (ff->ffInfo.sdata) {
+ if (!ef_available)
+ return false;
+ ef_available = false;
+ }
+ // Find a way of routing the input through fabric, if it's not driven by the LUT
+ if (ff->ffInfo.datain && (!luts[i] || (ff->ffInfo.datain != luts[i]->combInfo.comb_out))) {
+ if (route_thru_lut_avail)
+ route_thru_lut_avail = false;
+ else if (ef_available)
+ ef_available = false;
+ else
+ return false;
+ }
+ found_ff = true;
+ }
+ }
+
+ return true;
+}
+
+void Arch::update_alm_input_count(uint32_t lab, uint8_t alm)
+{
+ // TODO: duplication with above
+ auto &alm_data = labs.at(lab).alms.at(alm);
+ // Get cells into an array for fast access
+ std::array<const CellInfo *, 2> luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])};
+ std::array<const CellInfo *, 4> ffs{getBoundBelCell(alm_data.ff_bels[0]), getBoundBelCell(alm_data.ff_bels[1]),
+ getBoundBelCell(alm_data.ff_bels[2]), getBoundBelCell(alm_data.ff_bels[3])};
+ int total_inputs = 0;
+ int total_lut_inputs = 0;
+ for (int i = 0; i < 2; i++) {
+ if (!luts[i])
+ continue;
+ total_lut_inputs += luts[i]->combInfo.used_lut_input_count - luts[i]->combInfo.chain_shared_input_count;
+ }
+ int shared_lut_inputs = 0;
+ if (luts[0] && luts[1]) {
+ for (int i = 0; i < luts[1]->combInfo.lut_input_count; i++) {
+ const NetInfo *sig = luts[1]->combInfo.lut_in[i];
+ if (!sig)
+ continue;
+ for (int j = 0; j < luts[0]->combInfo.lut_input_count; j++) {
+ if (sig == luts[0]->combInfo.lut_in[j]) {
+ ++shared_lut_inputs;
+ break;
+ }
+ }
+ if (shared_lut_inputs >= 2) {
+ // only 2 inputs have guaranteed sharing, without routeability based LUT permutation at least
+ break;
+ }
+ }
+ }
+ total_inputs = std::max(0, total_lut_inputs - shared_lut_inputs);
+ for (int i = 0; i < 4; i++) {
+ const CellInfo *ff = ffs[i];
+ if (!ff)
+ continue;
+ if (ff->ffInfo.sdata)
+ ++total_inputs;
+ // FF input doesn't consume routing resources if driven by associated LUT
+ if (ff->ffInfo.datain && (!luts[i / 2] || ff->ffInfo.datain != luts[i / 2]->combInfo.comb_out))
+ ++total_inputs;
+ }
+ alm_data.unique_input_count = total_inputs;
+}
+
+bool Arch::check_lab_input_count(uint32_t lab) const
+{
+ // There are only 46 TD signals available to route signals from general routing to the ALM input. Currently, we
+ // check the total sum of ALM inputs is less than 42; 46 minus 4 FF control inputs. This is a conservative check for
+ // several reasons, because LD signals are also available for feedback routing from ALM output to input, and because
+ // TD signals may be shared if the same net routes to multiple ALMs. But these cases will need careful handling and
+ // LUT permutation during routing to be useful; and in any event conservative LAB packing will help nextpnr's
+ // currently perfunctory place and route algorithms to achieve satisfactory runtimes.
+ int count = 0;
+ auto &lab_data = labs.at(lab);
+ for (int i = 0; i < 10; i++) {
+ count += lab_data.alms.at(i).unique_input_count;
+ }
+ return (count <= 42);
+}
+
+namespace {
+bool check_assign_sig(ControlSig &sig_set, const ControlSig &sig)
+{
+ if (sig.net == nullptr) {
+ return true;
+ } else if (sig_set == sig) {
+ return true;
+ } else if (sig_set.net == nullptr) {
+ sig_set = sig;
+ return true;
+ } else {
+ return false;
+ }
+};
+
+template <size_t N> bool check_assign_sig(std::array<ControlSig, N> &sig_set, const ControlSig &sig)
+{
+ if (sig.net == nullptr)
+ return true;
+ for (size_t i = 0; i < N; i++)
+ if (sig_set[i] == sig) {
+ return true;
+ } else if (sig_set[i].net == nullptr) {
+ sig_set[i] = sig;
+ return true;
+ }
+ return false;
+};
+
+// DATAIN mapping rules - which LAB DATAIN signals can be used for ENA and ACLR
+static constexpr std::array<int, 3> ena_datain{2, 3, 0};
+static constexpr std::array<int, 2> aclr_datain{3, 2};
+
+struct LabCtrlSetWorker
+{
+
+ ControlSig clk{}, sload{}, sclr{};
+ std::array<ControlSig, 2> aclr{};
+ std::array<ControlSig, 3> ena{};
+
+ std::array<ControlSig, 4> datain{};
+
+ bool run(const Arch *arch, uint32_t lab)
+ {
+ // Strictly speaking the constraint is up to 2 unique CLK and 3 CLK+ENA pairs. For now we simplify this to 1 CLK
+ // and 3 ENA though.
+ for (uint8_t alm = 0; alm < 10; alm++) {
+ for (uint8_t i = 0; i < 4; i++) {
+ const CellInfo *ff = arch->getBoundBelCell(arch->labs.at(lab).alms.at(alm).ff_bels.at(i));
+ if (ff == nullptr)
+ continue;
+
+ if (!check_assign_sig(clk, ff->ffInfo.ctrlset.clk))
+ return false;
+ if (!check_assign_sig(sload, ff->ffInfo.ctrlset.sload))
+ return false;
+ if (!check_assign_sig(sclr, ff->ffInfo.ctrlset.sclr))
+ return false;
+ if (!check_assign_sig(aclr, ff->ffInfo.ctrlset.aclr))
+ return false;
+ if (!check_assign_sig(ena, ff->ffInfo.ctrlset.ena))
+ return false;
+ }
+ }
+ // Check for overuse of the shared, LAB-wide datain signals
+ if (clk.net != nullptr && !clk.net->is_global)
+ if (!check_assign_sig(datain[0], clk)) // CLK only needs DATAIN[0] if it's not global
+ return false;
+ if (!check_assign_sig(datain[1], sload))
+ return false;
+ if (!check_assign_sig(datain[3], sclr))
+ return false;
+ for (const auto &aclr_sig : aclr) {
+ // Check both possibilities that ACLR can map to
+ // TODO: ACLR could be global, too
+ if (check_assign_sig(datain[aclr_datain[0]], aclr_sig))
+ continue;
+ if (check_assign_sig(datain[aclr_datain[1]], aclr_sig))
+ continue;
+ // Failed to find any free ACLR-capable DATAIN
+ return false;
+ }
+ for (const auto &ena_sig : ena) {
+ // Check all 3 possibilities that ACLR can map to
+ // TODO: ACLR could be global, too
+ if (check_assign_sig(datain[ena_datain[0]], ena_sig))
+ continue;
+ if (check_assign_sig(datain[ena_datain[1]], ena_sig))
+ continue;
+ if (check_assign_sig(datain[ena_datain[2]], ena_sig))
+ continue;
+ // Failed to find any free ENA-capable DATAIN
+ return false;
+ }
+ return true;
+ }
+};
+
+}; // namespace
+
+bool Arch::is_lab_ctrlset_legal(uint32_t lab) const
+{
+ LabCtrlSetWorker worker;
+ return worker.run(this, lab);
+}
+
+void Arch::lab_pre_route()
+{
+ log_info("Preparing LABs for routing...\n");
+ for (uint32_t lab = 0; lab < labs.size(); lab++) {
+ assign_control_sets(lab);
+ for (uint8_t alm = 0; alm < 10; alm++) {
+ reassign_alm_inputs(lab, alm);
+ }
+ }
+}
+
+void Arch::assign_control_sets(uint32_t lab)
+{
+ // Set up reservations for checkPipAvail for control set signals
+ // This will be needed because clock and CE are routed together and must be kept together, there isn't free choice
+ // e.g. CLK0 & ENA0 must be use for one control set, and CLK1 & ENA1 for another, they can't be mixed and matched
+ // Similarly for how inverted & noninverted variants must be kept separate
+ LabCtrlSetWorker worker;
+ bool legal = worker.run(this, lab);
+ NPNR_ASSERT(legal);
+ auto &lab_data = labs.at(lab);
+
+ for (int j = 0; j < 2; j++) {
+ lab_data.aclr_used[j] = false;
+ }
+
+ for (uint8_t alm = 0; alm < 10; alm++) {
+ auto &alm_data = lab_data.alms.at(alm);
+ for (uint8_t i = 0; i < 4; i++) {
+ BelId ff_bel = alm_data.ff_bels.at(i);
+ const CellInfo *ff = getBoundBelCell(ff_bel);
+ if (ff == nullptr)
+ continue;
+ ControlSig ena_sig = ff->ffInfo.ctrlset.ena;
+ WireId clk_wire = getBelPinWire(ff_bel, id_CLK);
+ WireId ena_wire = getBelPinWire(ff_bel, id_ENA);
+ for (int j = 0; j < 3; j++) {
+ if (ena_sig == worker.datain[ena_datain[j]]) {
+ if (getCtx()->debug) {
+ log_info("Assigned CLK/ENA set %d to FF %s (%s)\n", i, nameOf(ff), getCtx()->nameOfBel(ff_bel));
+ }
+ // TODO: lock clock according to ENA choice, too, when we support two clocks per ALM
+ reserve_route(lab_data.clk_wires[0], clk_wire);
+ reserve_route(lab_data.ena_wires[j], ena_wire);
+ alm_data.clk_ena_idx[i / 2] = j;
+ break;
+ }
+ }
+
+ ControlSig aclr_sig = ff->ffInfo.ctrlset.aclr;
+ WireId aclr_wire = getBelPinWire(ff_bel, id_ACLR);
+ for (int j = 0; j < 2; j++) {
+ // TODO: could be global ACLR, too
+ if (aclr_sig == worker.datain[aclr_datain[j]]) {
+ if (getCtx()->debug) {
+ log_info("Assigned ACLR set %d to FF %s (%s)\n", i, nameOf(ff), getCtx()->nameOfBel(ff_bel));
+ }
+ reserve_route(lab_data.aclr_wires[j], aclr_wire);
+ lab_data.aclr_used[j] = (aclr_sig.net != nullptr);
+ alm_data.aclr_idx[i / 2] = j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+namespace {
+// Gets the name of logical LUT pin i for a given cell
+static IdString get_lut_pin(CellInfo *cell, int i)
+{
+ const std::array<IdString, 6> log_pins{id_A, id_B, id_C, id_D, id_E, id_F};
+ const std::array<IdString, 5> log_pins_arith{id_A, id_B, id_C, id_D0, id_D1};
+ return (cell->type == id_MISTRAL_ALUT_ARITH) ? log_pins_arith.at(i) : log_pins.at(i);
+}
+
+static void assign_lut6_inputs(CellInfo *cell, int lut)
+{
+ std::array<IdString, 6> phys_pins{id_A, id_B, id_C, id_D, (lut == 1) ? id_E1 : id_E0, (lut == 1) ? id_F1 : id_F0};
+ int phys_idx = 0;
+ for (int i = 0; i < 6; i++) {
+ IdString log = get_lut_pin(cell, i);
+ if (!cell->ports.count(log) || cell->ports.at(log).net == nullptr)
+ continue;
+ cell->pin_data[log].bel_pins.clear();
+ cell->pin_data[log].bel_pins.push_back(phys_pins.at(phys_idx++));
+ }
+}
+} // namespace
+
+void Arch::reassign_alm_inputs(uint32_t lab, uint8_t alm)
+{
+ // Based on the usage of LUTs inside the ALM, set up cell-bel pin map for the combinational cells in the ALM
+ // so that each physical bel pin is only used for one net; and the logical functions can be implemented correctly.
+ // This function should also insert route-through LUTs to legalise flipflop inputs as needed.
+ auto &alm_data = labs.at(lab).alms.at(alm);
+ alm_data.l6_mode = false;
+ std::array<CellInfo *, 2> luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])};
+ std::array<CellInfo *, 4> ffs{getBoundBelCell(alm_data.ff_bels[0]), getBoundBelCell(alm_data.ff_bels[1]),
+ getBoundBelCell(alm_data.ff_bels[2]), getBoundBelCell(alm_data.ff_bels[3])};
+
+ for (int i = 0; i < 2; i++) {
+ // Currently we treat LUT6s as a special case, as they never share inputs
+ if (luts[i] != nullptr && luts[i]->type == id_MISTRAL_ALUT6) {
+ alm_data.l6_mode = true;
+ NPNR_ASSERT(luts[1 - i] == nullptr); // only allow one LUT6 per ALM and no other LUTs
+ assign_lut6_inputs(luts[i], i);
+ }
+ }
+
+ if (!alm_data.l6_mode) {
+ // In L5 mode; which is what we use in this case
+ // - A and B are shared
+ // - C, E0, and F0 are exclusive to the top LUT5 secion
+ // - D, E1, and F1 are exclusive to the bottom LUT5 section
+ // First find up to two shared inputs
+ std::unordered_map<IdString, int> shared_nets;
+ if (luts[0] && luts[1]) {
+ for (int i = 0; i < luts[0]->combInfo.lut_input_count; i++) {
+ for (int j = 0; j < luts[1]->combInfo.lut_input_count; j++) {
+ if (luts[0]->combInfo.lut_in[i] == nullptr)
+ continue;
+ if (luts[0]->combInfo.lut_in[i] != luts[1]->combInfo.lut_in[j])
+ continue;
+ IdString net = luts[0]->combInfo.lut_in[i]->name;
+ if (shared_nets.count(net))
+ continue;
+ int idx = int(shared_nets.size());
+ shared_nets[net] = idx;
+ if (shared_nets.size() >= 2)
+ goto shared_search_done;
+ }
+ }
+ shared_search_done:;
+ }
+ // A and B can be used for half-specific nets if not assigned to shared nets
+ bool a_avail = shared_nets.size() == 0, b_avail = shared_nets.size() <= 1;
+ // Do the actual port assignment
+ for (int i = 0; i < 2; i++) {
+ if (!luts[i])
+ continue;
+ // Work out which physical ports are available
+ std::vector<IdString> avail_phys_ports;
+ // D/C always available and dedicated to the half, in L5 mode
+ avail_phys_ports.push_back((i == 1) ? id_D : id_C);
+ // In arithmetic mode, Ei can only be used for D0 and Fi can only be used for D1
+ // otherwise, these are general and dedicated to one half
+ if (!luts[i]->combInfo.is_carry) {
+ avail_phys_ports.push_back((i == 1) ? id_E1 : id_E0);
+ avail_phys_ports.push_back((i == 1) ? id_F1 : id_F0);
+ }
+ // A and B might be used for shared signals, or already used by the other half
+ if (b_avail)
+ avail_phys_ports.push_back(id_B);
+ if (a_avail)
+ avail_phys_ports.push_back(id_A);
+ int phys_idx = 0;
+
+ for (int j = 0; j < luts[i]->combInfo.lut_input_count; j++) {
+ IdString log = get_lut_pin(luts[i], j);
+ auto &bel_pins = luts[i]->pin_data[log].bel_pins;
+ bel_pins.clear();
+
+ NetInfo *net = get_net_or_empty(luts[i], log);
+ if (net == nullptr) {
+ // Disconnected inputs don't need to be allocated a pin, because the router won't be routing these
+ continue;
+ } else if (shared_nets.count(net->name)) {
+ // This pin is to be allocated one of the shared nets
+ bel_pins.push_back(shared_nets.at(net->name) ? id_B : id_A);
+ } else if (log == id_D0) {
+ // Arithmetic
+ bel_pins.push_back((i == 1) ? id_E1 : id_E0); // reserved
+ } else if (log == id_D1) {
+ bel_pins.push_back((i == 1) ? id_F1 : id_F0); // reserved
+ } else {
+ // Allocate from the general pool of available physical pins
+ IdString phys = avail_phys_ports.at(phys_idx++);
+ bel_pins.push_back(phys);
+ // Mark A/B unavailable for the other LUT, if needed
+ if (phys == id_A)
+ a_avail = false;
+ else if (phys == id_B)
+ b_avail = false;
+ }
+ }
+ }
+ }
+
+ // FF route-through insertion
+ for (int i = 0; i < 2; i++) {
+ // FF route-through will never be inserted if LUT is used
+ if (luts[i])
+ continue;
+ for (int j = 0; j < 2; j++) {
+ CellInfo *ff = ffs[i * 2 + j];
+ if (!ff || !ff->ffInfo.datain || alm_data.l6_mode)
+ continue;
+ CellInfo *rt_lut = createCell(id(stringf("%s$ROUTETHRU", nameOf(ff))), id_MISTRAL_BUF);
+ rt_lut->addInput(id_A);
+ rt_lut->addOutput(id_Q);
+ // Disconnect the original data input to the FF, and connect it to the route-thru LUT instead
+ NetInfo *datain = get_net_or_empty(ff, id_DATAIN);
+ disconnect_port(getCtx(), ff, id_DATAIN);
+ connect_port(getCtx(), datain, rt_lut, id_A);
+ connect_ports(getCtx(), rt_lut, id_Q, ff, id_DATAIN);
+ // Assign route-thru LUT physical ports, input goes to the first half-specific input
+ rt_lut->pin_data[id_A].bel_pins.push_back(i ? id_D : id_C);
+ rt_lut->pin_data[id_Q].bel_pins.push_back(id_COMBOUT);
+ assign_comb_info(rt_lut);
+ // Place the route-thru LUT at the relevant combinational bel
+ bindBel(alm_data.lut_bels[i], rt_lut, STRENGTH_STRONG);
+ break;
+ }
+ }
+
+ // TODO: in the future, as well as the reassignment here we will also have pseudo PIPs in front of the ALM so that
+ // the router can permute LUTs for routeability; too. Here we will need to lock out some of those PIPs depending on
+ // the usage of the ALM, as not all inputs are always interchangeable.
+ // Get cells into an array for fast access
+}
+
+// This default cell-bel pin mapping is used to provide estimates during placement only. It will have errors and
+// overlaps and a correct mapping will be resolved twixt placement and routing
+const std::unordered_map<IdString, IdString> Arch::comb_pinmap = {
+ {id_A, id_F0}, // fastest input first
+ {id_B, id_E0}, {id_C, id_D}, {id_D, id_C}, {id_D0, id_C}, {id_D1, id_B},
+ {id_E, id_B}, {id_F, id_A}, {id_Q, id_COMBOUT}, {id_SO, id_COMBOUT},
+};
+
+namespace {
+// gets the value of the ith LUT init property of a given cell
+uint64_t get_lut_init(const CellInfo *cell, int i)
+{
+ if (cell->type == id_MISTRAL_NOT) {
+ return 1;
+ } else if (cell->type == id_MISTRAL_BUF) {
+ return 2;
+ } else {
+ IdString prop;
+ if (cell->type == id_MISTRAL_ALUT_ARITH)
+ prop = (i == 1) ? id_LUT1 : id_LUT0;
+ else
+ prop = id_LUT;
+ auto fnd = cell->params.find(prop);
+ if (fnd == cell->params.end())
+ return 0;
+ else
+ return fnd->second.as_int64();
+ }
+}
+// gets the state of a physical pin when evaluating the a given bit of LUT init for
+bool get_phys_pin_val(bool l6_mode, bool arith_mode, int bit, IdString pin)
+{
+ switch (pin.index) {
+ case ID_A:
+ return (bit >> 0) & 0x1;
+ case ID_B:
+ return (bit >> 1) & 0x1;
+ case ID_C:
+ return (l6_mode && bit >= 32) ? ((bit >> 3) & 0x1) : ((bit >> 2) & 0x1);
+ case ID_D:
+ return (l6_mode && bit < 32) ? ((bit >> 3) & 0x1) : ((bit >> 2) & 0x1);
+ case ID_E0:
+ case ID_E1:
+ return l6_mode ? ((bit >> 5) & 0x1) : ((bit >> 3) & 0x1);
+ case ID_F0:
+ case ID_F1:
+ return arith_mode ? ((bit >> 3) & 0x1) : ((bit >> 4) & 0x1);
+ default:
+ NPNR_ASSERT_FALSE("unknown physical pin!");
+ }
+}
+} // namespace
+
+uint64_t Arch::compute_lut_mask(uint32_t lab, uint8_t alm)
+{
+ uint64_t mask = 0;
+ auto &alm_data = labs.at(lab).alms.at(alm);
+ std::array<CellInfo *, 2> luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])};
+
+ for (int i = 0; i < 2; i++) {
+ CellInfo *lut = luts[i];
+ if (!lut)
+ continue;
+ int offset = ((i == 1) && !alm_data.l6_mode) ? 32 : 0;
+ bool arith = lut->combInfo.is_carry;
+ for (int j = 0; j < (alm_data.l6_mode ? 64 : 32); j++) {
+ // Evaluate LUT function at this point
+ uint64_t init = get_lut_init(lut, (arith && j >= 16) ? 1 : 0);
+ int index = 0;
+ for (int k = 0; k < lut->combInfo.lut_input_count; k++) {
+ IdString log_pin = get_lut_pin(lut, k);
+ int init_idx = k;
+ if (arith) {
+ // D0 only affects lower half; D1 upper half
+ if (k == 3 && j >= 16)
+ continue;
+ if (k == 4) {
+ if (j < 16)
+ continue;
+ else
+ init_idx = 3;
+ }
+ }
+ CellPinState state = lut->get_pin_state(log_pin);
+ if (state == PIN_0)
+ continue;
+ else if (state == PIN_1)
+ index |= (1 << init_idx);
+ // Ignore if no associated physical pin
+ if (get_net_or_empty(lut, log_pin) == nullptr || lut->pin_data.at(log_pin).bel_pins.empty())
+ continue;
+ // ALM inputs appear to be inverted by default (TODO: check!)
+ // so only invert if an inverter has _not_ been folded into the pin
+ bool inverted = (state != PIN_INV);
+ // Depermute physical pin
+ IdString phys_pin = lut->pin_data.at(log_pin).bel_pins.at(0);
+ if (get_phys_pin_val(alm_data.l6_mode, arith, j, phys_pin) != inverted)
+ index |= (1 << init_idx);
+ }
+ if ((init >> index) & 0x1) {
+ mask |= (1ULL << uint64_t(j + offset));
+ }
+ }
+ }
+
+ // TODO: always inverted, or just certain paths?
+ mask = ~mask;
+
+#if 1
+ if (getCtx()->debug) {
+ auto pos = alm_data.lut_bels[0].pos;
+ log("ALM %03d.%03d.%d\n", CycloneV::pos2x(pos), CycloneV::pos2y(pos), alm);
+ for (int i = 0; i < 2; i++) {
+ log(" LUT%d: ", i);
+ if (luts[i]) {
+ log("%s:%s", nameOf(luts[i]), nameOf(luts[i]->type));
+ for (auto &pin : luts[i]->pin_data) {
+ if (!luts[i]->ports.count(pin.first) || luts[i]->ports.at(pin.first).type != PORT_IN)
+ continue;
+ log(" %s:", nameOf(pin.first));
+ if (pin.second.state == PIN_0)
+ log("0");
+ else if (pin.second.state == PIN_1)
+ log("1");
+ else if (pin.second.state == PIN_INV)
+ log("~");
+ for (auto bp : pin.second.bel_pins)
+ log("%s", nameOf(bp));
+ }
+ } else {
+ log("<null>");
+ }
+ log("\n");
+ }
+ log("INIT: %016lx\n", mask);
+ log("\n");
+ }
+#endif
+
+ return mask;
+}
+
+NEXTPNR_NAMESPACE_END