diff options
-rw-r--r-- | .cirrus/Dockerfile.ubuntu20.04 | 2 | ||||
-rw-r--r-- | .github/workflows/mistral_ci.yml | 2 | ||||
-rw-r--r-- | common/parallel_refine.cc | 2 | ||||
-rw-r--r-- | docs/generic.md | 8 | ||||
-rw-r--r-- | docs/viaduct.md | 131 | ||||
-rw-r--r-- | gowin/arch.cc | 64 | ||||
-rw-r--r-- | gowin/arch.h | 3 | ||||
-rw-r--r-- | gowin/cells.cc | 4 | ||||
-rw-r--r-- | gowin/constids.inc | 12 | ||||
-rw-r--r-- | gowin/pack.cc | 22 | ||||
-rw-r--r-- | ice40/arch.cc | 29 | ||||
-rw-r--r-- | ice40/archdefs.h | 1 | ||||
-rw-r--r-- | ice40/pack.cc | 102 | ||||
-rw-r--r-- | mistral/bitstream.cc | 30 |
14 files changed, 375 insertions, 37 deletions
diff --git a/.cirrus/Dockerfile.ubuntu20.04 b/.cirrus/Dockerfile.ubuntu20.04 index d1e7b47b..6561ed59 100644 --- a/.cirrus/Dockerfile.ubuntu20.04 +++ b/.cirrus/Dockerfile.ubuntu20.04 @@ -29,7 +29,7 @@ RUN set -e -x ;\ cd /usr/local/src ;\ git clone --recursive https://github.com/YosysHQ/icestorm.git ;\ cd icestorm ;\ - git reset --hard 4bc68c9620e6be20f8fe10d20f84681d80beac23 ;\ + git reset --hard 9f66f9ce16941c6417813cb87653c735a78b53ae ;\ make -j $(nproc) ;\ make install diff --git a/.github/workflows/mistral_ci.yml b/.github/workflows/mistral_ci.yml index a02026cf..b0bbfb52 100644 --- a/.github/workflows/mistral_ci.yml +++ b/.github/workflows/mistral_ci.yml @@ -21,7 +21,7 @@ jobs: - name: Execute build nextpnr env: MISTRAL_PATH: ${{ github.workspace }}/deps/mistral - MISTRAL_REVISION: 0c2ab2b2c6af33fea1c20349be2e0068366ed615 + MISTRAL_REVISION: ebfc0dd2cc7d6d2159b641a397c88554840e93c9 run: | source ./.github/ci/build_mistral.sh get_dependencies diff --git a/common/parallel_refine.cc b/common/parallel_refine.cc index bc665cd3..a868ca58 100644 --- a/common/parallel_refine.cc +++ b/common/parallel_refine.cc @@ -546,6 +546,7 @@ struct ThreadState bool accept_move() { + static constexpr double epsilon = 1e-20; double delta = g.cfg.lambda * (timing_delta / std::max<double>(epsilon, g.total_timing_cost)) + (1.0 - g.cfg.lambda) * (double(wirelen_delta) / std::max<double>(epsilon, g.total_wirelen)); return delta < 0 || @@ -565,7 +566,6 @@ struct ThreadState return true; } - static constexpr double epsilon = 1e-20; bool single_cell_swap(CellInfo *cell, BelId new_bel) { NPNR_ASSERT(moved_cells.empty()); diff --git a/docs/generic.md b/docs/generic.md index 96db872e..108d41d7 100644 --- a/docs/generic.md +++ b/docs/generic.md @@ -1,8 +1,12 @@ # nextpnr Generic Architecture -Instead of implementing the [C++ API](archapi.md), you can programmatically +Instead of implementing the full [C++ API](archapi.md), you can programmatically build up a description of an FPGA using the generic architecture and the -Python API. +Python API, or the [Viaduct C++ API](viaduct.md) (described further in its own +document). + +The Viaduct API allows more complex constraints to be implemented and has shorter +startup times than using the Python API. A basic packer is provided that supports LUTs, flipflops and IO buffer insertion. Packing could also be implemented using the Python API. diff --git a/docs/viaduct.md b/docs/viaduct.md new file mode 100644 index 00000000..a59310f1 --- /dev/null +++ b/docs/viaduct.md @@ -0,0 +1,131 @@ +# Viaduct - a series of small arches + +Viaduct is a C++-based successor to the Python generic API that gets most of the benefits of a full-custom nextpnr architecture, with the simplicity of a harness to build from and a predefined flat set of data structures for the placement and routing resources. + +Like the Python generic API, the routing graph can be built programmatically, or loaded from an external data source at startup. However, the Viaduct framework provides considerably improved startup times by relying less on strings and eliminating the C++/Python boundary; and also enables more complex architectures to be modeled with arbitrary place-and-route time constraints implemented as code, in the spirit of nextpnr. + +A Viaduct implementation is called a 'uarch' (microarch), because it's smaller than a full architecture. + +Viaduct implementations, including some examples, are located as subfolders of `generic/viaduct/`. + +## Viaduct API Reference + +### Initialisation + +A Viaduct uarch must override `ViaductAPI` - see `generic/viaduct_api.h`. This contains virtual methods to be optionally overridden, in most cases only a small number of these need be. + +```c++ +void init(Context *ctx) +``` + +This should perform device resources initialisation. uarches should always call the superclass `ViaductAPI::init(ctx)` first, too. + +Bels (placement locations), wires ('metal' interconnect) and pips (programmable switches that connect wires) can be dynamically created by calling the methods of `Context` described in the [generic arch docs](coding.md) - the most important methods to start with are: + +```c++ +ctx->addWire(IdStringList name, IdString type, int x, int y); +ctx->addPip(IdStringList name, IdString type, WireId srcWire, WireId dstWire, float delay, Loc loc); +ctx->addBel(IdStringList name, IdString type, Loc loc, bool gb, bool hidden); +ctx->addBelInput(BelId bel, IdString name, WireId wire); +ctx->addBelOutput(BelId bel, IdString name, WireId wire); +ctx->addBelInout(BelId bel, IdString name, WireId wire); +``` + +### Helpers + +nextpnr uses an indexed, interned string type for performance and object names (for bels, wires and pips) are based on lists of these. To performantly build these; you can add a `ViaductHelpers` instance to your uarch, call `init(ctx)` on it, and then use the `xy_id(x, y, base)` member functions of this. For example: + +```c++ +ViaductHelpers h; +h.init(ctx); +ctx->addWire(h.xy_id(13, 45, ctx->id("CLK0")), ctx->id("CLK"), 13, 45); +``` + +To create a wire named `X13/Y45/CLK0`. + +### Constant IDs + +In some cases, such as during packing and validity checks, `IdString`s for strings such as common port names will be needed a large number of times. To avoid the string hash and compare associated with `ctx->id("string")`, you can use the "constids" support. To use this: + + - create a 'constids.inc' file in your uarch folder containing one ID per line; inside X( ). For example: +``` +X(LUT4) +X(DFF) +X(CLK) +X(D) +X(F) +``` + - set the `VIADUCT_CONSTIDS` macro to the path to this file relative to the generic arch base + - in the same file as `init` is implemented; also define the `GEN_INIT_CONSTIDS` macro before the `viaduct_constids.h` include to create `init_uarch_constids`, which you should call in your `init` implementation. + - in any file you need the constant `IdString`s, include `viaduct_constids.h` and the ids will be accessible as constants named like `id_LUT4`. + +### Constraints + +```c++ +bool checkBelAvail(BelId bel) const; +bool isValidBelForCellType(IdString cell_type, BelId bel) const; +bool isBelLocationValid(BelId bel) const; +bool checkWireAvail(WireId wire) const; +bool checkPipAvail(PipId pip) const; +bool checkPipAvailForNet(PipId pip, NetInfo *net) const; +``` + +These can be overriden, if needed to implement nextpnr's system of arbitrary,architecture-defined constraints on legal placements and the availability of placement and routing resources. These could be used to implement placement rules inside tiles (like clocks that are shared between flipflops); or disable one routing resource when a conflicting one is used. They only need to be overriden, and return false, where a resource is unavailable due to a specific, custom constraint and not just because that resource itself is occupied. + +For more information on terminology, see [FAQ](faq.md); for references of these functions see the [Arch API](archapi.md) docs; and for some general hints see the [Coding Tips](coding.md). + +uarches may update internal, constraint-related structures based on placement and routing updates by optionally overriding the 'hook' functions called whenever bindings are changed. + +```c++ +void notifyBelChange(BelId bel, CellInfo *cell); +void notifyWireChange(WireId wire, NetInfo *net); +void notifyPipChange(PipId pip, NetInfo *net); +``` + +These will be called with `cell` or `net` pointing to the object the resource is being bound to for a bind; or `nullptr` for an unbind. + +### Packing + +Although arches can implement as much or as little packing as they like, nextpnr leans towards doing minimal pre-placement packing and leaving the combination of LUTs and flipflops into tiles, and similar tasks, down to placement validity checks (`checkBelAvail`). + +Any packing tasks that do need to be done; for example cleaning up top level IO pairing cells that should always stay together using relative constraints, should be done by overriding the `pack` method: + +```c++ +void pack(); +``` + + +There are also hooks to perform custom transformations or steps in-between and after placement and routing: + +```c++ +void prePlace(); +void postPlace(); +void preRoute(); +void postRoute(); +``` + +The most common use for this would be to implement a custom bitstream generation step (or similar intermediate format) inside `postRoute` on the final design. Another example use case would be to implement a custom global clock routing pass in `preRoute`. + +### ViaductArch + +As well as creating the uarch class that derives from `ViaductAPI`, you also need to create a factory for it by creating a singleton of a class that derives from `ViaductArch`. This should, in its constructor, construct `ViaductArch` with the arch name, and also implement the `create` function to return a new instance of your `ViaductAPI` implementation. For example: + +```c++ +struct ExampleArch : ViaductArch +{ + ExampleArch() : ViaductArch("example"){}; + std::unique_ptr<ViaductAPI> create(const dict<std::string, std::string> &args) + { + return std::make_unique<ExampleImpl>(); + } +} exampleArch; +``` + +### Adding a new uarch + +The reference above provides an overview of what a Viaduct uarch must implement, it's also recommended to look at the `generic` and `okami` examples in `generic/viaduct`. New uarches should have their source contained in subfolders of `generic/viaduct`; and added to `VIADUCT_UARCHES` list in `generic/family.cmake`. + +Once you've implemented `ViaductAPI` and created the `ViaductArch` singleton, you should be able to run nextpnr with the arch by running `nextpnr-generic --uarch <name>`. + + + diff --git a/gowin/arch.cc b/gowin/arch.cc index 6213124f..2c1e50b7 100644 --- a/gowin/arch.cc +++ b/gowin/arch.cc @@ -440,7 +440,9 @@ IdString Arch::wireToGlobal(int &row, int &col, const DatabasePOD *db, IdString { const std::string &wirename = wire.str(this); char buf[32]; - if (wirename == "VCC" || wirename == "GND") { + if (wirename == "VCC" || wirename == "VSS") { + row = 0; + col = 0; return wire; } if (!isdigit(wirename[1]) || !isdigit(wirename[2]) || !isdigit(wirename[3])) { @@ -949,6 +951,13 @@ Arch::Arch(ArchArgs args) : args(args) package_name.c_str(this), speed_id.c_str(this)); // setup db + // add global VCC and GND bels + addBel(id_GND, id_GND, Loc(0, 0, BelZ::gnd_0_z), true); + addWire(id_VSS, id_VSS, 0, 0); + addBelOutput(id_GND, id_G, id_VSS); + addBel(id_VCC, id_VCC, Loc(0, 0, BelZ::vcc_0_z), true); + addWire(id_VCC, id_VCC, 0, 0); + addBelOutput(id_VCC, id_V, id_VCC); char buf[32]; // The reverse order of the enumeration simplifies the creation // of MUX2_LUT8s: they need the existence of the wire on the right. @@ -1000,6 +1009,44 @@ Arch::Arch(ArchArgs args) : args(args) snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); addBelInput(belname, id_GSRI, id(buf)); break; + case ID_OSC: + snprintf(buf, 32, "R%dC%d_OSC", row + 1, col + 1); + belname = id(buf); + addBel(belname, id_OSC, Loc(col, row, 0), false); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCOUT)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelOutput(belname, id_OSCOUT, id(buf)); + break; + case ID_OSCH: + snprintf(buf, 32, "R%dC%d_OSCH", row + 1, col + 1); + belname = id(buf); + addBel(belname, id_OSCH, Loc(col, row, 0), false); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCOUT)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelOutput(belname, id_OSCOUT, id(buf)); + break; + case ID_OSCF: + snprintf(buf, 32, "R%dC%d_OSCF", row + 1, col + 1); + belname = id(buf); + addBel(belname, id_OSCF, Loc(col, row, 0), false); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCOUT)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelOutput(belname, id_OSCOUT, id(buf)); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCEN)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelInput(belname, id_OSCEN, id(buf)); + break; + case ID_OSCZ: + snprintf(buf, 32, "R%dC%d_OSCZ", row + 1, col + 1); + belname = id(buf); + addBel(belname, id_OSCZ, Loc(col, row, 0), false); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCOUT)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelOutput(belname, id_OSCOUT, id(buf)); + portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_OSCEN)->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelInput(belname, id_OSCEN, id(buf)); + break; // fall through the ++ case ID_LUT7: z++; @@ -1149,6 +1196,21 @@ Arch::Arch(ArchArgs args) : args(args) snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); addBelInput(belname, id_CLK, id(buf)); + const PairPOD *xxx_port = pairLookup(bel->ports.get(), bel->num_ports, ID_XXX_VSS); + if (xxx_port != nullptr) { + ddr_has_extra_inputs = true; + portname = IdString(xxx_port->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelInput(belname, id_XXX_VSS, id(buf)); + } + xxx_port = pairLookup(bel->ports.get(), bel->num_ports, ID_XXX_VCC); + if (xxx_port != nullptr) { + ddr_has_extra_inputs = true; + portname = IdString(xxx_port->src_id); + snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); + addBelInput(belname, id_XXX_VCC, id(buf)); + } + if (oddrc) { portname = IdString(pairLookup(bel->ports.get(), bel->num_ports, ID_CE)->src_id); snprintf(buf, 32, "R%dC%d_%s", row + 1, col + 1, portname.c_str(this)); diff --git a/gowin/arch.h b/gowin/arch.h index c8392e7e..bc29a59b 100644 --- a/gowin/arch.h +++ b/gowin/arch.h @@ -470,6 +470,9 @@ struct Arch : BaseArch<ArchRanges> void updateClockSpinesCache(IdString spine_id, IdString wire_id); void fixClockSpineDecals(void); + // XXX GW1N-9C DDR quirk + bool ddr_has_extra_inputs = false; + // Permissible combinations of modes in a single slice std::map<const IdString, IdString> dff_comp_mode; }; diff --git a/gowin/cells.cc b/gowin/cells.cc index 8e450b51..c3b21782 100644 --- a/gowin/cells.cc +++ b/gowin/cells.cc @@ -65,6 +65,10 @@ std::unique_ptr<CellInfo> create_generic_cell(Context *ctx, IdString type, std:: new_cell->addOutput(id_O); } else if (type == id_GSR) { new_cell->addInput(id_GSRI); + } else if (type == id_GND) { + new_cell->addOutput(id_G); + } else if (type == id_VCC) { + new_cell->addOutput(id_V); } else { log_error("unable to create generic cell of type %s\n", type.c_str(ctx)); } diff --git a/gowin/constids.inc b/gowin/constids.inc index 125fdc74..d2a6b171 100644 --- a/gowin/constids.inc +++ b/gowin/constids.inc @@ -681,6 +681,8 @@ X(IOBJS) // IOLOGIC X(TX) +X(XXX_VSS) +X(XXX_VCC) X(OBUF_TYPE) X(SBUF) X(DBUF) @@ -756,6 +758,12 @@ X(GSR) X(GSR0) X(GSRI) +// Oscillators +X(OSC) +X(OSCZ) +X(OSCH) +X(OSCF) + // primitive attributes X(INIT) X(FF_USED) @@ -787,6 +795,10 @@ X(SUM) X(CIN) X(COUT) X(OF) +X(V) +X(G) +X(OSCOUT) +X(OSCEN) // timing X(X0) diff --git a/gowin/pack.cc b/gowin/pack.cc index 9f0a2478..28370a75 100644 --- a/gowin/pack.cc +++ b/gowin/pack.cc @@ -611,20 +611,17 @@ static void pack_constants(Context *ctx) { log_info("Packing constants..\n"); - std::unique_ptr<CellInfo> gnd_cell = create_generic_cell(ctx, id_SLICE, "$PACKER_GND"); - gnd_cell->params[id_INIT] = Property(0, 1 << 4); + std::unique_ptr<CellInfo> gnd_cell = create_generic_cell(ctx, id_GND, "$PACKER_GND"); auto gnd_net = std::make_unique<NetInfo>(ctx->id("$PACKER_GND_NET")); gnd_net->driver.cell = gnd_cell.get(); - gnd_net->driver.port = id_F; - gnd_cell->ports.at(id_F).net = gnd_net.get(); + gnd_net->driver.port = id_G; + gnd_cell->ports.at(id_G).net = gnd_net.get(); - std::unique_ptr<CellInfo> vcc_cell = create_generic_cell(ctx, id_SLICE, "$PACKER_VCC"); - // Fill with 1s - vcc_cell->params[id_INIT] = Property(Property::S1).extract(0, (1 << 4), Property::S1); + std::unique_ptr<CellInfo> vcc_cell = create_generic_cell(ctx, id_VCC, "$PACKER_VCC"); auto vcc_net = std::make_unique<NetInfo>(ctx->id("$PACKER_VCC_NET")); vcc_net->driver.cell = vcc_cell.get(); - vcc_net->driver.port = id_F; - vcc_cell->ports.at(id_F).net = vcc_net.get(); + vcc_net->driver.port = id_V; + vcc_cell->ports.at(id_V).net = vcc_net.get(); std::vector<IdString> dead_nets; @@ -801,6 +798,13 @@ static void pack_iologic(Context *ctx) ci->attrs[id_IOBUF] = 1; } } + // if have XXX_ inputs connect them + if (ctx->ddr_has_extra_inputs) { + ci->addInput(id_XXX_VSS); + ci->connectPort(id_XXX_VSS, ctx->nets[ctx->id("$PACKER_GND_NET")].get()); + ci->addInput(id_XXX_VCC); + ci->connectPort(id_XXX_VCC, ctx->nets[ctx->id("$PACKER_VCC_NET")].get()); + } } break; default: break; diff --git a/ice40/arch.cc b/ice40/arch.cc index b36c82d5..6746b302 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -923,9 +923,20 @@ std::vector<GraphicElement> Arch::getDecalGraphics(DecalId decal) const bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayQuad &delay) const { - if (cell->type == id_ICESTORM_LC && cell->lcInfo.dffEnable) { - if (toPort == id_O) - return false; + if (cell->type == id_ICESTORM_LC) { + if (toPort == id_O) { + if (cell->lcInfo.dffEnable) + return false; + // "false paths" + if (fromPort == id_I0 && ((cell->lcInfo.lutInputMask & 0x1U) == 0)) + return false; + if (fromPort == id_I1 && ((cell->lcInfo.lutInputMask & 0x2U) == 0)) + return false; + if (fromPort == id_I2 && ((cell->lcInfo.lutInputMask & 0x4U) == 0)) + return false; + if (fromPort == id_I3 && ((cell->lcInfo.lutInputMask & 0x8U) == 0)) + return false; + } } else if (cell->type == id_ICESTORM_RAM || cell->type == id_ICESTORM_SPRAM) { return false; } @@ -1231,6 +1242,18 @@ void Arch::assignCellInfo(CellInfo *cell) cell->lcInfo.inputCount++; if (cell->getPort(id_I3)) cell->lcInfo.inputCount++; + // Find don't care LUT inputs to mask for timing analysis + cell->lcInfo.lutInputMask = 0x0; + unsigned init = int_or_default(cell->params, id_LUT_INIT); + for (unsigned k = 0; k < 4; k++) { + for (unsigned i = 0; i < 16; i++) { + // If toggling the LUT input makes a difference it's not a don't care + if (((init >> i) & 0x1U) != ((init >> (i ^ (1U << k))) & 0x1U)) { + cell->lcInfo.lutInputMask |= (1U << k); + break; + } + } + } } else if (cell->type == id_SB_IO) { cell->ioInfo.lvds = str_or_default(cell->params, id_IO_STANDARD, "SB_LVCMOS") == "SB_LVDS_INPUT"; cell->ioInfo.global = bool_or_default(cell->attrs, id_GLOBAL); diff --git a/ice40/archdefs.h b/ice40/archdefs.h index 3d8ea282..07b209f1 100644 --- a/ice40/archdefs.h +++ b/ice40/archdefs.h @@ -137,6 +137,7 @@ struct ArchCellInfo : BaseClusterInfo bool carryEnable; bool negClk; int inputCount; + unsigned lutInputMask; const NetInfo *clk, *cen, *sr; } lcInfo; struct diff --git a/ice40/pack.cc b/ice40/pack.cc index 2b5def46..92297e8e 100644 --- a/ice40/pack.cc +++ b/ice40/pack.cc @@ -266,6 +266,68 @@ static void pack_carries(Context *ctx) log_info(" %4d LCs used as CARRY only\n", carry_only); } +static void merge_carry_luts(Context *ctx) +{ + // Find carrys + log_info("Packing indirect carry+LUT pairs...\n"); + // Find cases where a less-than-LUT2 is driving a carry and pack them together + // +----+ +-----+ | + // A--|LUT2|----|CARRY| | + // B--| | C-| |-+ + // +----+ +-| | + // | +-----+ + // | + pool<IdString> packed_cells; + auto rewrite_init = [](unsigned lut_init) { + // I0 -> LUT I2 + // I1, I2 -> carry; don't care + // I3 -> LUT I3 + unsigned result = 0; + for (unsigned i = 0; i < 16; i++) { + unsigned j = 0; + if ((i & 1)) + j |= 4; + if ((i & 8)) + j |= 8; + if (lut_init & (1 << j)) + result |= (1 << i); + } + return result; + }; + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->type != id_ICESTORM_LC || !bool_or_default(ci->params, id_CARRY_ENABLE)) + continue; // not a carry LC + if (ci->getPort(id_O)) + continue; // LUT output is already used + for (auto port : {id_I1, id_I2}) { // check carry inputs + NetInfo *i = ci->getPort(port); + if (!i) + continue; + CellInfo *drv = i->driver.cell; + if (i->driver.port != id_O) + continue; + if (!drv || drv->type != id_ICESTORM_LC || packed_cells.count(drv->name) || + bool_or_default(drv->params, id_CARRY_ENABLE) || bool_or_default(drv->params, id_DFF_ENABLE)) + continue; // not driven by a LUT, or driver already swallowed + // Check cardinality - must be LUT2 or less, noting top inputs used first + if (drv->getPort(id_I0) || drv->getPort(id_I1)) + continue; + // Pack into carry + drv->movePortTo(id_I2, ci, id_I0); + drv->movePortTo(id_I3, ci, id_I3); + drv->movePortTo(id_O, ci, id_O); + ci->params[id_LUT_INIT] = Property(rewrite_init(int_or_default(drv->params, id_LUT_INIT)), 16); + packed_cells.insert(drv->name); + break; + } + } + for (auto pcell : packed_cells) { + ctx->cells.erase(pcell); + } + log_info(" %4d LUTs merged into carry LCs\n", int(packed_cells.size())); +} + // "Pack" RAMs static void pack_ram(Context *ctx) { @@ -962,31 +1024,56 @@ static void place_plls(Context *ctx) // Find a BEL for it BelId found_bel; + std::string conflict_str = ""; for (auto bel_pll : pll_all_bels) { - if (pll_used_bels.count(bel_pll.first)) + if (pll_used_bels.count(bel_pll.first)) { + conflict_str += + stringf(" PLL bel '%s' is already used by '%s'.\n", ctx->nameOfBel(bel_pll.first), + pll_used_bels.at(bel_pll.first)->name.c_str(ctx)); continue; + } BelPin pll_io_a, pll_io_b; BelId gb_a, gb_b; std::tie(pll_io_a, gb_a, pll_io_b, gb_b) = bel_pll.second; if (bel2io.count(pll_io_a.bel)) { if (pll_io_a.bel == pad_bel) could_be_pad = !bel2io.count(pll_io_b.bel) || !is_sb_pll40_dual(ctx, ci); + auto conflict_pin = ctx->get_bel_package_pin(pll_io_a.bel); + conflict_str += + stringf(" PLL bel '%s' cannot be used as it conflicts with input '%s' on pin '%s'.\n", + ctx->nameOfBel(bel_pll.first), bel2io.at(pll_io_a.bel)->name.c_str(ctx), + conflict_pin.c_str()); continue; } - if (bel2io.count(pll_io_b.bel) && is_sb_pll40_dual(ctx, ci)) + if (bel2io.count(pll_io_b.bel) && is_sb_pll40_dual(ctx, ci)) { + auto conflict_pin = ctx->get_bel_package_pin(pll_io_b.bel); + conflict_str += + stringf(" PLL bel '%s' cannot be used as it conflicts with input '%s' on pin '%s'.\n", + ctx->nameOfBel(bel_pll.first), bel2io.at(pll_io_b.bel)->name.c_str(ctx), + conflict_pin.c_str()); continue; - if (gb_a_used && bel2gb.count(gb_a)) + } + if (gb_a_used && bel2gb.count(gb_a)) { + conflict_str += stringf( + " PLL bel '%s' cannot be used as it conflicts with global buffer '%s' at '%s'.\n", + ctx->nameOfBel(bel_pll.first), bel2gb.at(gb_a)->name.c_str(ctx), ctx->nameOfBel(gb_a)); continue; - if (gb_b_used && bel2gb.count(gb_b)) + } + if (gb_b_used && bel2gb.count(gb_b)) { + conflict_str += stringf( + " PLL bel '%s' cannot be used as it conflicts with global buffer '%s' at '%s'.\n", + ctx->nameOfBel(bel_pll.first), bel2gb.at(gb_b)->name.c_str(ctx), ctx->nameOfBel(gb_b)); continue; + } found_bel = bel_pll.first; break; } // Apply constrain & Inform user of result - if (found_bel == BelId()) - log_error("PLL '%s' couldn't be placed anywhere, no suitable BEL found.%s\n", ci->name.c_str(ctx), - could_be_pad ? " Did you mean to use a PAD PLL ?" : ""); + if (found_bel == BelId()) { + log_error("PLL '%s' couldn't be placed anywhere, no suitable BEL found.%s\n%s\n", ci->name.c_str(ctx), + could_be_pad ? " Did you mean to use a PAD PLL ?" : "", conflict_str.c_str()); + } log_info(" constrained PLL '%s' to %s\n", ci->name.c_str(ctx), ctx->nameOfBel(found_bel)); if (could_be_pad) @@ -1617,6 +1704,7 @@ bool Arch::pack() pack_lut_lutffs(ctx); pack_nonlut_ffs(ctx); pack_carries(ctx); + merge_carry_luts(ctx); pack_ram(ctx); place_plls(ctx); pack_special(ctx); diff --git a/mistral/bitstream.cc b/mistral/bitstream.cc index c5105008..3e1b8b66 100644 --- a/mistral/bitstream.cc +++ b/mistral/bitstream.cc @@ -114,7 +114,7 @@ struct MistralBitgen { (void)ci; // currently unused auto pos = CycloneV::xy2pos(x, y); - cv->bmux_r_set(CycloneV::CMUXHG, pos, CycloneV::INPUT_SELECT, bi, 0x1b); // hardcode to general routing + cv->bmux_r_set(CycloneV::CMUXHG, pos, CycloneV::INPUT_SEL, bi, 0x1b); // hardcode to general routing cv->bmux_m_set(CycloneV::CMUXHG, pos, CycloneV::TESTSYN_ENOUT_SELECT, bi, CycloneV::PRE_SYNENB); } @@ -210,7 +210,8 @@ struct MistralBitgen if (is_lutram) { for (int i = 0; i < 10; i++) { // Many MLAB settings apply to the whole LAB, not just the ALM - cv->bmux_m_set(block_type, pos, CycloneV::MODE, i, CycloneV::RAM); + cv->bmux_m_set(block_type, pos, CycloneV::TMODE, i, CycloneV::RAM); + cv->bmux_m_set(block_type, pos, CycloneV::BMODE, i, CycloneV::RAM); cv->bmux_n_set(block_type, pos, CycloneV::T_FEEDBACK_SEL, i, 1); } cv->bmux_r_set(block_type, pos, CycloneV::LUT_MASK, alm, 0xFFFFFFFFFFFFFFFFULL); // TODO: LUTRAM init @@ -222,11 +223,11 @@ struct MistralBitgen cv->bmux_n_set(block_type, pos, CycloneV::WRITE_PULSE_LENGTH, 0, 650); // picoseconds, presumably // TODO: understand how these enables really work cv->bmux_b_set(block_type, pos, CycloneV::EN2_EN, 0, false); - cv->bmux_b_set(block_type, pos, CycloneV::EN_SCLK_LOAD_WHAT, 0, true); - cv->bmux_m_set(block_type, pos, CycloneV::SCLR_MUX, 0, CycloneV::GIN2); + cv->bmux_b_set(block_type, pos, CycloneV::SCLR_DIS, 0, true); } else { - // Combinational mode - TODO: flop feedback - cv->bmux_m_set(block_type, pos, CycloneV::MODE, alm, alm_data.l6_mode ? CycloneV::L6 : CycloneV::L5); + // Combinational mode - TODO: flop feedback and more modes... + cv->bmux_m_set(block_type, pos, CycloneV::TMODE, alm, alm_data.l6_mode ? CycloneV::C_E : CycloneV::E_0); + cv->bmux_m_set(block_type, pos, CycloneV::BMODE, alm, alm_data.l6_mode ? CycloneV::D_E : CycloneV::E_1); // LUT function cv->bmux_r_set(block_type, pos, CycloneV::LUT_MASK, alm, ctx->compute_lut_mask(lab, alm)); } @@ -300,12 +301,17 @@ struct MistralBitgen // SCLR if (ff->ffInfo.ctrlset.sclr.net != nullptr) { cv->bmux_b_set(block_type, pos, CycloneV::SCLR_INV, 0, ff->ffInfo.ctrlset.sclr.inverted); + cv->bmux_b_set(block_type, pos, CycloneV::SCLR_DIS, 0, false); } else { cv->bmux_b_set(block_type, pos, sclr_dis[i / 2], alm, true); } // SLOAD if (ff->ffInfo.ctrlset.sload.net != nullptr) { cv->bmux_b_set(block_type, pos, sload_en[i / 2], alm, true); + if (ff->ffInfo.ctrlset.sload.net->name == ctx->id("$PACKER_GND_NET")) { + // force-disabled LOAD (see workaround in assign_ff_info) + cv->bmux_b_set(block_type, pos, CycloneV::SLOAD_EN, 0, false); + } cv->bmux_b_set(block_type, pos, CycloneV::SLOAD_INV, 0, ff->ffInfo.ctrlset.sload.inverted); } } @@ -340,18 +346,18 @@ struct MistralBitgen const std::array<CycloneV::bmux_type_t, 2> aclr_inp{CycloneV::ACLR0_SEL, CycloneV::ACLR1_SEL}; for (int i = 0; i < 2; i++) { - // Quartus seems to set unused ACLRs to CLKI2... - if (!lab_data.aclr_used[i]) - cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, CycloneV::CLKI2); - else - cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, (i == 1) ? CycloneV::GIN0 : CycloneV::GIN1); + // Quartus seems to set unused ACLRs to ACLR0 + if (lab_data.aclr_used[i]) + cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, (i == 1) ? CycloneV::DIN2 : CycloneV::DIN3); + else if (i == 0) + cv->bmux_m_set(block_type, pos, aclr_inp[i], 0, CycloneV::ACLR0); } for (int i = 0; i < 3; i++) { // Check for fabric->clock routing if (ctx->wires_connected( ctx->get_port(block_type, CycloneV::pos2x(pos), CycloneV::pos2y(pos), -1, CycloneV::DATAIN, 0), lab_data.clk_wires[i])) - cv->bmux_m_set(block_type, pos, CycloneV::CLKA_SEL, 0, CycloneV::GIN2); + cv->bmux_m_set(block_type, pos, CycloneV::CLKA_SEL, 0, CycloneV::DIN0); } } |