From b88e86f3663178f9c257e930d8e125a353ee1bdb Mon Sep 17 00:00:00 2001 From: Lofty Date: Sun, 16 May 2021 08:19:08 +0100 Subject: mistral: Use MLABs as if they're LABs (for now) Signed-off-by: Lofty --- mistral/arch.cc | 5 ++++- mistral/arch.h | 8 +++++--- mistral/bitstream.cc | 50 ++++++++++++++++++++++++++------------------------ mistral/lab.cc | 50 ++++++++++++++++++++++++++++---------------------- 4 files changed, 63 insertions(+), 50 deletions(-) diff --git a/mistral/arch.cc b/mistral/arch.cc index ba7483e7..045625a3 100644 --- a/mistral/arch.cc +++ b/mistral/arch.cc @@ -71,7 +71,10 @@ Arch::Arch(ArchArgs args) for (CycloneV::block_type_t bel : cyclonev->pos_get_bels(pos)) { switch (bel) { case CycloneV::block_type_t::LAB: - create_lab(x, y); + create_lab(x, y, /*is_mlab=*/false); + break; + case CycloneV::block_type_t::MLAB: + create_lab(x, y, /*is_mlab=*/true); break; default: continue; diff --git a/mistral/arch.h b/mistral/arch.h index c13c4c8a..a808f69d 100644 --- a/mistral/arch.h +++ b/mistral/arch.h @@ -58,6 +58,8 @@ struct ALMInfo struct LABInfo { + // LAB or MLAB? + bool is_mlab; std::array alms; // Control set wires std::array clk_wires, ena_wires; @@ -457,9 +459,9 @@ struct Arch : BaseArch return WireId(cyclonev->pnode_to_rnode(CycloneV::pnode(bt, x, y, port, bi, pi))); } - void create_lab(int x, int y); // lab.cc - void create_gpio(int x, int y); // io.cc - void create_clkbuf(int x, int y); // globals.cc + void create_lab(int x, int y, bool is_mlab); // lab.cc + void create_gpio(int x, int y); // io.cc + void create_clkbuf(int x, int y); // globals.cc // ------------------------------------------------- diff --git a/mistral/bitstream.cc b/mistral/bitstream.cc index 0e8b9c85..665005e9 100644 --- a/mistral/bitstream.cc +++ b/mistral/bitstream.cc @@ -214,6 +214,7 @@ struct MistralBitgen bool write_alm(uint32_t lab, uint8_t alm) { auto &alm_data = ctx->labs.at(lab).alms.at(alm); + auto block_type = ctx->labs.at(lab).is_mlab ? CycloneV::MLAB : CycloneV::LAB; std::array luts{ctx->getBoundBelCell(alm_data.lut_bels[0]), ctx->getBoundBelCell(alm_data.lut_bels[1])}; @@ -227,28 +228,28 @@ struct MistralBitgen auto pos = alm_data.lut_bels[0].pos; // Combinational mode - TODO: flop feedback - cv->bmux_m_set(CycloneV::LAB, pos, CycloneV::MODE, alm, alm_data.l6_mode ? CycloneV::L6 : CycloneV::L5); + cv->bmux_m_set(block_type, pos, CycloneV::MODE, alm, alm_data.l6_mode ? CycloneV::L6 : CycloneV::L5); // LUT function - cv->bmux_r_set(CycloneV::LAB, pos, CycloneV::LUT_MASK, alm, ctx->compute_lut_mask(lab, alm)); + cv->bmux_r_set(block_type, pos, CycloneV::LUT_MASK, alm, ctx->compute_lut_mask(lab, alm)); // DFF/LUT output selection const std::array mux_settings{CycloneV::TDFF0, CycloneV::TDFF1, CycloneV::TDFF1L, CycloneV::BDFF0, CycloneV::BDFF1, CycloneV::BDFF1L}; const std::array mux_port{CycloneV::FFT0, CycloneV::FFT1, CycloneV::FFT1L, CycloneV::FFB0, CycloneV::FFB1, CycloneV::FFB1L}; for (int i = 0; i < 6; i++) { - if (ctx->wires_connected(alm_data.comb_out[i / 3], ctx->get_port(CycloneV::LAB, CycloneV::pos2x(pos), + if (ctx->wires_connected(alm_data.comb_out[i / 3], ctx->get_port(block_type, CycloneV::pos2x(pos), CycloneV::pos2y(pos), alm, mux_port[i]))) - cv->bmux_m_set(CycloneV::LAB, pos, mux_settings[i], alm, CycloneV::NLUT); + cv->bmux_m_set(block_type, pos, mux_settings[i], alm, CycloneV::NLUT); } bool is_carry = (luts[0] && luts[0]->combInfo.is_carry) || (luts[1] && luts[1]->combInfo.is_carry); if (is_carry) - cv->bmux_m_set(CycloneV::LAB, pos, CycloneV::ARITH_SEL, alm, CycloneV::ADDER); + cv->bmux_m_set(block_type, pos, CycloneV::ARITH_SEL, alm, CycloneV::ADDER); // The carry in/out enable bits if (is_carry && alm == 0 && !luts[0]->combInfo.carry_start) - cv->bmux_b_set(CycloneV::LAB, pos, CycloneV::TTO_DIS, 0, true); + cv->bmux_b_set(block_type, pos, CycloneV::TTO_DIS, 0, true); if (is_carry && alm == 5) - cv->bmux_b_set(CycloneV::LAB, pos, CycloneV::BTO_DIS, 0, true); + cv->bmux_b_set(block_type, pos, CycloneV::BTO_DIS, 0, true); // Flipflop configuration const std::array ef_sel{CycloneV::TEF_SEL, CycloneV::BEF_SEL}; // This isn't a typo; the *PKREG* bits really are mirrored. @@ -269,7 +270,7 @@ struct MistralBitgen for (int i = 0; i < 2; i++) { // EF selection mux if (ctx->wires_connected(ctx->getBelPinWire(alm_data.lut_bels[i], i ? id_F0 : id_F1), alm_data.sel_ef[i])) - cv->bmux_m_set(CycloneV::LAB, pos, ef_sel[i], alm, CycloneV::bmux_type_t::F); + cv->bmux_m_set(block_type, pos, ef_sel[i], alm, CycloneV::bmux_type_t::F); } for (int i = 0; i < 4; i++) { @@ -278,35 +279,35 @@ struct MistralBitgen continue; // PKREG (input selection) if (ctx->wires_connected(alm_data.sel_ef[i / 2], alm_data.ff_in[i])) - cv->bmux_b_set(CycloneV::LAB, pos, pkreg[i], alm, true); + cv->bmux_b_set(block_type, pos, pkreg[i], alm, true); // Control set // CLK+ENA int ce_idx = alm_data.clk_ena_idx[i / 2]; - cv->bmux_m_set(CycloneV::LAB, pos, clk_sel[i / 2], alm, clk_choice[ce_idx]); + cv->bmux_m_set(block_type, pos, clk_sel[i / 2], alm, clk_choice[ce_idx]); if (ff->ffInfo.ctrlset.clk.inverted) - cv->bmux_b_set(CycloneV::LAB, pos, clk_inv[ce_idx], 0, true); + cv->bmux_b_set(block_type, pos, clk_inv[ce_idx], 0, true); if (get_net_or_empty(ff, id_ENA) != nullptr) { // not using ffInfo.ctrlset, this has a fake net always to // ensure different constants don't collide - cv->bmux_b_set(CycloneV::LAB, pos, en_en[ce_idx], 0, true); - cv->bmux_b_set(CycloneV::LAB, pos, en_ninv[ce_idx], 0, ff->ffInfo.ctrlset.ena.inverted); + cv->bmux_b_set(block_type, pos, en_en[ce_idx], 0, true); + cv->bmux_b_set(block_type, pos, en_ninv[ce_idx], 0, ff->ffInfo.ctrlset.ena.inverted); } else { - cv->bmux_b_set(CycloneV::LAB, pos, en_en[ce_idx], 0, false); + cv->bmux_b_set(block_type, pos, en_en[ce_idx], 0, false); } // ACLR int aclr_idx = alm_data.aclr_idx[i / 2]; - cv->bmux_b_set(CycloneV::LAB, pos, clr_sel[i / 2], alm, aclr_idx == 1); + cv->bmux_b_set(block_type, pos, clr_sel[i / 2], alm, aclr_idx == 1); if (ff->ffInfo.ctrlset.aclr.inverted) - cv->bmux_b_set(CycloneV::LAB, pos, aclr_inv[aclr_idx], 0, true); + cv->bmux_b_set(block_type, pos, aclr_inv[aclr_idx], 0, true); // SCLR if (ff->ffInfo.ctrlset.sclr.net != nullptr) { - cv->bmux_b_set(CycloneV::LAB, pos, CycloneV::SCLR_INV, 0, ff->ffInfo.ctrlset.sclr.inverted); + cv->bmux_b_set(block_type, pos, CycloneV::SCLR_INV, 0, ff->ffInfo.ctrlset.sclr.inverted); } else { - cv->bmux_b_set(CycloneV::LAB, pos, sclr_dis[i / 2], alm, true); + cv->bmux_b_set(block_type, pos, sclr_dis[i / 2], alm, true); } // SLOAD if (ff->ffInfo.ctrlset.sload.net != nullptr) { - cv->bmux_b_set(CycloneV::LAB, pos, sload_en[i / 2], alm, true); - cv->bmux_b_set(CycloneV::LAB, pos, CycloneV::SLOAD_INV, 0, ff->ffInfo.ctrlset.sload.inverted); + cv->bmux_b_set(block_type, pos, sload_en[i / 2], alm, true); + cv->bmux_b_set(block_type, pos, CycloneV::SLOAD_INV, 0, ff->ffInfo.ctrlset.sload.inverted); } } return true; @@ -316,21 +317,22 @@ struct MistralBitgen { auto &lab_data = ctx->labs.at(lab); auto pos = lab_data.alms.at(0).lut_bels[0].pos; + auto block_type = ctx->labs.at(lab).is_mlab ? CycloneV::MLAB : CycloneV::LAB; const std::array aclr_inp{CycloneV::ACLR0_SEL, CycloneV::ACLR1_SEL}; for (int i = 0; i < 2; i++) { // Quartus seems to set unused ACLRs to CLKI2... if (!lab_data.aclr_used[i]) - cv->bmux_m_set(CycloneV::LAB, pos, aclr_inp[i], 0, CycloneV::CLKI2); + cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, CycloneV::CLKI2); else - cv->bmux_m_set(CycloneV::LAB, pos, aclr_inp[i], 0, (i == 1) ? CycloneV::GIN0 : CycloneV::GIN1); + cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, (i == 1) ? CycloneV::GIN0 : CycloneV::GIN1); } for (int i = 0; i < 3; i++) { // Check for fabric->clock routing - if (ctx->wires_connected(ctx->get_port(CycloneV::LAB, CycloneV::pos2x(pos), CycloneV::pos2y(pos), -1, + if (ctx->wires_connected(ctx->get_port(block_type, CycloneV::pos2x(pos), CycloneV::pos2y(pos), -1, CycloneV::DATAIN, 0), lab_data.clk_wires[i])) - cv->bmux_m_set(CycloneV::LAB, pos, CycloneV::CLKA_SEL, 0, CycloneV::GIN2); + cv->bmux_m_set(block_type, pos, CycloneV::CLKA_SEL, 0, CycloneV::GIN2); } } diff --git a/mistral/lab.cc b/mistral/lab.cc index 56bc604a..2ef22412 100644 --- a/mistral/lab.cc +++ b/mistral/lab.cc @@ -33,6 +33,7 @@ static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx) { auto &lab = arch->labs.at(lab_idx); auto &alm = lab.alms.at(z); + auto block_type = lab.is_mlab ? CycloneV::MLAB : CycloneV::LAB; // Create the combinational part of ALMs. // There are two of these, for the two LUT outputs, and these also contain the carry chain and associated logic // Each one has all 8 ALM inputs as input pins. In many cases only a subset of these are used; depending on mode; @@ -66,14 +67,14 @@ static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx) BelId bel = arch->add_bel(x, y, arch->id(stringf("ALM%d_COMB%d", z, i)), id_MISTRAL_COMB); // LUT/MUX inputs - arch->add_bel_pin(bel, id_A, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::A)); - arch->add_bel_pin(bel, id_B, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::B)); - arch->add_bel_pin(bel, id_C, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::C)); - arch->add_bel_pin(bel, id_D, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::D)); - arch->add_bel_pin(bel, id_E0, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::E0)); - arch->add_bel_pin(bel, id_E1, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::E1)); - arch->add_bel_pin(bel, id_F0, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::F0)); - arch->add_bel_pin(bel, id_F1, PORT_IN, arch->get_port(CycloneV::LAB, x, y, z, CycloneV::F1)); + arch->add_bel_pin(bel, id_A, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::A)); + arch->add_bel_pin(bel, id_B, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::B)); + arch->add_bel_pin(bel, id_C, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::C)); + arch->add_bel_pin(bel, id_D, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::D)); + arch->add_bel_pin(bel, id_E0, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::E0)); + arch->add_bel_pin(bel, id_E1, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::E1)); + arch->add_bel_pin(bel, id_F0, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::F0)); + arch->add_bel_pin(bel, id_F1, PORT_IN, arch->get_port(block_type, x, y, z, CycloneV::F1)); // Carry/share chain arch->add_bel_pin(bel, id_CI, PORT_IN, carry_in); arch->add_bel_pin(bel, id_SHAREIN, PORT_IN, share_in); @@ -105,8 +106,8 @@ static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx) } // E/F pips // Note that the F choice is mirrored, F from the other half is picked - arch->add_pip(arch->get_port(CycloneV::LAB, x, y, z, i ? CycloneV::E1 : CycloneV::E0), alm.sel_ef[i]); - arch->add_pip(arch->get_port(CycloneV::LAB, x, y, z, i ? CycloneV::F0 : CycloneV::F1), alm.sel_ef[i]); + arch->add_pip(arch->get_port(block_type, x, y, z, i ? CycloneV::E1 : CycloneV::E0), alm.sel_ef[i]); + arch->add_pip(arch->get_port(block_type, x, y, z, i ? CycloneV::F0 : CycloneV::F1), alm.sel_ef[i]); } // Create the flipflops and associated routing @@ -132,12 +133,12 @@ static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx) alm.ff_out[i] = arch->add_wire(x, y, arch->id(stringf("FFOUT[%d]", (z * 4) + i))); arch->add_bel_pin(bel, id_Q, PORT_OUT, alm.ff_out[i]); // Output mux (*DFF*) - WireId out = arch->get_port(CycloneV::LAB, x, y, z, outputs[i]); + WireId out = arch->get_port(block_type, x, y, z, outputs[i]); arch->add_pip(alm.ff_out[i], out); arch->add_pip(alm.comb_out[i / 2], out); // 'L' output mux where applicable if (i == 1 || i == 3) { - WireId l_out = arch->get_port(CycloneV::LAB, x, y, z, l_outputs[i / 2]); + WireId l_out = arch->get_port(block_type, x, y, z, l_outputs[i / 2]); arch->add_pip(alm.ff_out[i], l_out); arch->add_pip(alm.comb_out[i / 2], l_out); } @@ -148,16 +149,21 @@ static void create_alm(Arch *arch, int x, int y, int z, uint32_t lab_idx) b.lab_data.alm = z; b.lab_data.idx = i; } + + // TODO: MLAB-specific pins } } // namespace -void Arch::create_lab(int x, int y) +void Arch::create_lab(int x, int y, bool is_mlab) { uint32_t lab_idx = labs.size(); labs.emplace_back(); auto &lab = labs.back(); + lab.is_mlab = is_mlab; + auto block_type = is_mlab ? CycloneV::MLAB : CycloneV::LAB; + // Create common control set configuration. This is actually a subset of what's possible, but errs on the side of // caution due to incomplete documentation @@ -165,24 +171,24 @@ void Arch::create_lab(int x, int y) // permutations for (int i = 0; i < 3; i++) { lab.clk_wires[i] = add_wire(x, y, id(stringf("CLK%d", i))); - add_pip(get_port(CycloneV::LAB, x, y, -1, CycloneV::CLKIN, 0), lab.clk_wires[i]); // dedicated routing - add_pip(get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 0), lab.clk_wires[i]); // general routing + add_pip(get_port(block_type, x, y, -1, CycloneV::CLKIN, 0), lab.clk_wires[i]); // dedicated routing + add_pip(get_port(block_type, x, y, -1, CycloneV::DATAIN, 0), lab.clk_wires[i]); // general routing } // Enables - while it looks from the config like there are choices for these, it seems like EN0_SEL actually selects // SCLR not ENA0 and EN1_SEL actually selects SLOAD? - lab.ena_wires[0] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 2); - lab.ena_wires[1] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3); - lab.ena_wires[2] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 0); + lab.ena_wires[0] = get_port(block_type, x, y, -1, CycloneV::DATAIN, 2); + lab.ena_wires[1] = get_port(block_type, x, y, -1, CycloneV::DATAIN, 3); + lab.ena_wires[2] = get_port(block_type, x, y, -1, CycloneV::DATAIN, 0); // ACLRs - only consider general routing for now - lab.aclr_wires[0] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3); - lab.aclr_wires[1] = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 2); + lab.aclr_wires[0] = get_port(block_type, x, y, -1, CycloneV::DATAIN, 3); + lab.aclr_wires[1] = get_port(block_type, x, y, -1, CycloneV::DATAIN, 2); // SCLR and SLOAD - as above it seems like these might be selectable using the "EN*_SEL" bits but play it safe for // now - lab.sclr_wire = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 3); - lab.sload_wire = get_port(CycloneV::LAB, x, y, -1, CycloneV::DATAIN, 1); + lab.sclr_wire = get_port(block_type, x, y, -1, CycloneV::DATAIN, 3); + lab.sload_wire = get_port(block_type, x, y, -1, CycloneV::DATAIN, 1); for (int i = 0; i < 10; i++) { create_alm(this, x, y, i, lab_idx); -- cgit v1.2.3 From 0367719eea074bf4043c4baf3782c772d8c101ae Mon Sep 17 00:00:00 2001 From: gatecat Date: Tue, 24 Aug 2021 13:58:18 +0100 Subject: mistral: Permute MLAB init bits correctly --- mistral/lab.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mistral/lab.cc b/mistral/lab.cc index 2ef22412..f386aa00 100644 --- a/mistral/lab.cc +++ b/mistral/lab.cc @@ -881,6 +881,24 @@ bool get_phys_pin_val(bool l6_mode, bool arith_mode, int bit, IdString pin) NPNR_ASSERT_FALSE("unknown physical pin!"); } } + +static const std::array mlab_permute = {0, 1, 4, 5, 8, 9, 12, 13, 29, 28, 25, 24, 21, 20, 17, 16, + 2, 3, 6, 7, 10, 11, 14, 15, 31, 30, 27, 26, 23, 22, 19, 18, + 32, 33, 36, 37, 40, 41, 44, 45, 61, 60, 57, 56, 53, 52, 49, 48, + 34, 35, 38, 39, 42, 43, 46, 47, 63, 62, 59, 58, 55, 54, 51, 50}; + +// MLABs have permuted init values in hardware, we need to correct for this +uint64_t permute_mlab_init(uint64_t orig) +{ + uint64_t result = 0; + for (int i = 0; i < 64; i++) { + if ((orig >> uint64_t(i)) & 0x1) { + result |= (uint64_t(1) << uint64_t(mlab_permute.at(i))); + } + } + return result; +} + } // namespace uint64_t Arch::compute_lut_mask(uint32_t lab, uint8_t alm) @@ -898,6 +916,7 @@ uint64_t Arch::compute_lut_mask(uint32_t lab, uint8_t alm) for (int j = 0; j < (alm_data.l6_mode ? 64 : 32); j++) { // Evaluate LUT function at this point uint64_t init = get_lut_init(lut, (arith && j >= 16) ? 1 : 0); + int index = 0; for (int k = 0; k < lut->combInfo.lut_input_count; k++) { IdString log_pin = get_lut_pin(lut, k); @@ -938,6 +957,9 @@ uint64_t Arch::compute_lut_mask(uint32_t lab, uint8_t alm) // TODO: always inverted, or just certain paths? mask = ~mask; + if (labs.at(lab).is_mlab) + mask = permute_mlab_init(mask); + #if 1 if (getCtx()->debug) { auto pos = alm_data.lut_bels[0].pos; -- cgit v1.2.3