aboutsummaryrefslogtreecommitdiffstats
path: root/common/place
diff options
context:
space:
mode:
authorgatecat <gatecat@ds0.me>2022-04-08 13:42:54 +0100
committergatecat <gatecat@ds0.me>2022-04-08 13:42:54 +0100
commit49f178ed94b5fad00d25dbd12adea0bf4732f803 (patch)
treeea642e20bc07441a800944390e1f904e6ce5b113 /common/place
parente42e22575f20b59634f88b5cf694efdb413ff0a0 (diff)
downloadnextpnr-49f178ed94b5fad00d25dbd12adea0bf4732f803.tar.gz
nextpnr-49f178ed94b5fad00d25dbd12adea0bf4732f803.tar.bz2
nextpnr-49f178ed94b5fad00d25dbd12adea0bf4732f803.zip
Split up common into kernel,place,route
Signed-off-by: gatecat <gatecat@ds0.me>
Diffstat (limited to 'common/place')
-rw-r--r--common/place/fast_bels.h188
-rw-r--r--common/place/parallel_refine.cc959
-rw-r--r--common/place/parallel_refine.h43
-rw-r--r--common/place/place_common.cc487
-rw-r--r--common/place/place_common.h55
-rw-r--r--common/place/placer1.cc1317
-rw-r--r--common/place/placer1.h45
-rw-r--r--common/place/placer_heap.cc1830
-rw-r--r--common/place/placer_heap.h58
-rw-r--r--common/place/timing_opt.cc561
-rw-r--r--common/place/timing_opt.h37
11 files changed, 5580 insertions, 0 deletions
diff --git a/common/place/fast_bels.h b/common/place/fast_bels.h
new file mode 100644
index 00000000..ba9938c6
--- /dev/null
+++ b/common/place/fast_bels.h
@@ -0,0 +1,188 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 Claire Xenia Wolf <claire@yosyshq.com>
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#pragma once
+
+#include <cstddef>
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+// FastBels is a lookup class that provides a fast lookup for finding BELs
+// that support a given cell type.
+struct FastBels
+{
+ struct TypeData
+ {
+ size_t type_index;
+ int number_of_possible_bels;
+ };
+
+ FastBels(Context *ctx, bool check_bel_available, int minBelsForGridPick)
+ : ctx(ctx), check_bel_available(check_bel_available), minBelsForGridPick(minBelsForGridPick)
+ {
+ }
+
+ void addCellType(IdString cell_type)
+ {
+ auto iter = cell_types.find(cell_type);
+ if (iter != cell_types.end()) {
+ // This cell type has already been added to the fast BEL lookup.
+ return;
+ }
+
+ size_t type_idx = cell_types.size();
+ auto &cell_type_data = cell_types[cell_type];
+ cell_type_data.type_index = type_idx;
+
+ fast_bels_by_cell_type.resize(type_idx + 1);
+ auto &bel_data = fast_bels_by_cell_type.at(type_idx);
+ NPNR_ASSERT(bel_data.get() == nullptr);
+ bel_data = std::make_unique<FastBelsData>();
+
+ for (auto bel : ctx->getBels()) {
+ if (!ctx->isValidBelForCellType(cell_type, bel)) {
+ continue;
+ }
+
+ cell_type_data.number_of_possible_bels += 1;
+ }
+
+ for (auto bel : ctx->getBels()) {
+ if (check_bel_available && !ctx->checkBelAvail(bel)) {
+ continue;
+ }
+
+ if (!ctx->isValidBelForCellType(cell_type, bel)) {
+ continue;
+ }
+
+ Loc loc = ctx->getBelLocation(bel);
+ if (minBelsForGridPick >= 0 && cell_type_data.number_of_possible_bels < minBelsForGridPick) {
+ loc.x = loc.y = 0;
+ }
+
+ if (int(bel_data->size()) < (loc.x + 1)) {
+ bel_data->resize(loc.x + 1);
+ }
+
+ if (int(bel_data->at(loc.x).size()) < (loc.y + 1)) {
+ bel_data->at(loc.x).resize(loc.y + 1);
+ }
+
+ bel_data->at(loc.x).at(loc.y).push_back(bel);
+ }
+ }
+
+ void addBelBucket(BelBucketId partition)
+ {
+ auto iter = partition_types.find(partition);
+ if (iter != partition_types.end()) {
+ // This partition has already been added to the fast BEL lookup.
+ return;
+ }
+
+ size_t type_idx = partition_types.size();
+ auto &type_data = partition_types[partition];
+ type_data.type_index = type_idx;
+
+ fast_bels_by_partition_type.resize(type_idx + 1);
+ auto &bel_data = fast_bels_by_partition_type.at(type_idx);
+ NPNR_ASSERT(bel_data.get() == nullptr);
+ bel_data = std::make_unique<FastBelsData>();
+
+ for (auto bel : ctx->getBels()) {
+ if (ctx->getBelBucketForBel(bel) != partition) {
+ continue;
+ }
+
+ type_data.number_of_possible_bels += 1;
+ }
+
+ for (auto bel : ctx->getBels()) {
+ if (check_bel_available && !ctx->checkBelAvail(bel)) {
+ continue;
+ }
+
+ if (ctx->getBelBucketForBel(bel) != partition) {
+ continue;
+ }
+
+ Loc loc = ctx->getBelLocation(bel);
+ if (minBelsForGridPick >= 0 && type_data.number_of_possible_bels < minBelsForGridPick) {
+ loc.x = loc.y = 0;
+ }
+
+ if (int(bel_data->size()) < (loc.x + 1)) {
+ bel_data->resize(loc.x + 1);
+ }
+
+ if (int(bel_data->at(loc.x).size()) < (loc.y + 1)) {
+ bel_data->at(loc.x).resize(loc.y + 1);
+ }
+
+ bel_data->at(loc.x).at(loc.y).push_back(bel);
+ }
+ }
+
+ typedef std::vector<std::vector<std::vector<BelId>>> FastBelsData;
+
+ int getBelsForCellType(IdString cell_type, FastBelsData **data)
+ {
+ auto iter = cell_types.find(cell_type);
+ if (iter == cell_types.end()) {
+ addCellType(cell_type);
+ iter = cell_types.find(cell_type);
+ NPNR_ASSERT(iter != cell_types.end());
+ }
+
+ auto cell_type_data = iter->second;
+
+ *data = fast_bels_by_cell_type.at(cell_type_data.type_index).get();
+ return cell_type_data.number_of_possible_bels;
+ }
+
+ size_t getBelsForBelBucket(BelBucketId partition, FastBelsData **data)
+ {
+ auto iter = partition_types.find(partition);
+ if (iter == partition_types.end()) {
+ addBelBucket(partition);
+ iter = partition_types.find(partition);
+ NPNR_ASSERT(iter != partition_types.end());
+ }
+
+ auto type_data = iter->second;
+
+ *data = fast_bels_by_partition_type.at(type_data.type_index).get();
+ return type_data.number_of_possible_bels;
+ }
+
+ Context *ctx;
+ const bool check_bel_available;
+ const int minBelsForGridPick;
+
+ dict<IdString, TypeData> cell_types;
+ std::vector<std::unique_ptr<FastBelsData>> fast_bels_by_cell_type;
+
+ dict<BelBucketId, TypeData> partition_types;
+ std::vector<std::unique_ptr<FastBelsData>> fast_bels_by_partition_type;
+};
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/parallel_refine.cc b/common/place/parallel_refine.cc
new file mode 100644
index 00000000..a868ca58
--- /dev/null
+++ b/common/place/parallel_refine.cc
@@ -0,0 +1,959 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "parallel_refine.h"
+#include "log.h"
+
+#if !defined(__wasm)
+
+#include "fast_bels.h"
+#include "scope_lock.h"
+#include "timing.h"
+
+#include <chrono>
+#include <mutex>
+#include <queue>
+#include <shared_mutex>
+#include <thread>
+
+NEXTPNR_NAMESPACE_BEGIN
+
+namespace {
+struct Partition
+{
+ int x0, y0, x1, y1;
+ std::vector<CellInfo *> cells;
+ Partition() = default;
+ explicit Partition(Context *ctx)
+ {
+ x0 = ctx->getGridDimX();
+ y0 = ctx->getGridDimY();
+ x1 = 0;
+ y1 = 0;
+ for (auto &cell : ctx->cells) {
+ Loc l = ctx->getBelLocation(cell.second->bel);
+ x0 = std::min(x0, l.x);
+ x1 = std::max(x1, l.x);
+ y0 = std::min(y0, l.y);
+ y1 = std::max(y1, l.y);
+ cells.push_back(cell.second.get());
+ }
+ }
+ void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r)
+ {
+ std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) {
+ Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel);
+ return yaxis ? (l0.y < l1.y) : (l0.x < l1.x);
+ });
+ size_t pivot_point = size_t(cells.size() * pivot);
+ l.cells.clear();
+ r.cells.clear();
+ l.cells.reserve(pivot_point);
+ r.cells.reserve(cells.size() - pivot_point);
+ int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1)
+ : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y
+ : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x);
+ for (size_t i = 0; i < cells.size(); i++) {
+ Loc loc = ctx->getBelLocation(cells.at(i)->bel);
+ ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i));
+ }
+ if (yaxis) {
+ l.x0 = r.x0 = x0;
+ l.x1 = r.x1 = x1;
+ l.y0 = y0;
+ l.y1 = pivot_coord;
+ r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1);
+ r.y1 = y1;
+ } else {
+ l.y0 = r.y0 = y0;
+ l.y1 = r.y1 = y1;
+ l.x0 = x0;
+ l.x1 = pivot_coord;
+ r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1);
+ r.x1 = x1;
+ }
+ }
+};
+
+typedef int64_t wirelen_t;
+
+struct NetBB
+{
+ // Actual bounding box
+ int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
+ // Number of cells at each extremity
+ int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
+ wirelen_t hpwl(const ParallelRefineCfg &cfg) const
+ {
+ return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
+ }
+ static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr)
+ {
+ NetBB result{};
+ if (!net->driver.cell)
+ return result;
+ auto bel_loc = [&](const CellInfo *cell) {
+ BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel;
+ return ctx->getBelLocation(bel);
+ };
+ result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1;
+ Loc drv_loc = bel_loc(net->driver.cell);
+ result.x0 = result.x1 = drv_loc.x;
+ result.y0 = result.y1 = drv_loc.y;
+ for (auto &usr : net->users) {
+ Loc l = bel_loc(usr.cell);
+ if (l.x == result.x0)
+ ++result.nx0; // on the edge
+ else if (l.x < result.x0) {
+ result.x0 = l.x; // extends the edge
+ result.nx0 = 1;
+ }
+ if (l.x == result.x1)
+ ++result.nx1; // on the edge
+ else if (l.x > result.x1) {
+ result.x1 = l.x; // extends the edge
+ result.nx1 = 1;
+ }
+ if (l.y == result.y0)
+ ++result.ny0; // on the edge
+ else if (l.y < result.y0) {
+ result.y0 = l.y; // extends the edge
+ result.ny0 = 1;
+ }
+ if (l.y == result.y1)
+ ++result.ny1; // on the edge
+ else if (l.y > result.y1) {
+ result.y1 = l.y; // extends the edge
+ result.ny1 = 1;
+ }
+ }
+ return result;
+ }
+};
+
+struct GlobalState
+{
+ explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){};
+ Context *ctx;
+ ParallelRefineCfg cfg;
+ FastBels bels;
+ std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index
+ std::vector<NetBB> last_bounds;
+ std::vector<std::vector<double>> last_tmg_costs;
+ dict<IdString, NetBB> region_bounds;
+ TimingAnalyser tmg;
+
+ std::shared_timed_mutex archapi_mutex;
+
+ double temperature = 1e-7;
+ int radius = 3;
+
+ wirelen_t total_wirelen = 0;
+ double total_timing_cost = 0;
+
+ double get_timing_cost(const NetInfo *net, store_index<PortRef> user,
+ const dict<IdString, BelId> *cell2bel = nullptr)
+ {
+ if (!net->driver.cell)
+ return 0;
+ const auto &sink = net->users.at(user);
+ IdString driver_pin, sink_pin;
+ // Pick the first pin for a prediction; assume all will be similar enouhg
+ for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) {
+ driver_pin = pin;
+ break;
+ }
+ for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) {
+ sink_pin = pin;
+ break;
+ }
+ float crit = tmg.get_criticality(CellPortKey(sink));
+ BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel;
+ BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel;
+ double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin));
+ return delay * std::pow(crit, cfg.crit_exp);
+ }
+
+ bool skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ if (ctx->getBelGlobalBuf(net->driver.cell->bel))
+ return true;
+ return false;
+ }
+ bool timing_skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ int cc;
+ auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc);
+ if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK)
+ return true;
+ return false;
+ }
+ // ....
+};
+
+struct ThreadState
+{
+ Context *ctx; // Nextpnr context pointer
+ GlobalState &g; // Refinement engine state
+ int idx; // Index of the thread
+ DeterministicRNG rng; // Local RNG
+ // The cell partition that the thread works on
+ Partition p;
+ // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can
+ // optimise
+ std::vector<int> thread_net_idx;
+ // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's
+ // perspective
+ std::vector<NetInfo *> thread_nets;
+ std::vector<NetBB> net_bounds;
+ std::vector<std::vector<double>> arc_tmg_cost;
+ std::vector<bool> ignored_nets, tmg_ignored_nets;
+ bool arch_state_dirty = false;
+ // Our local cell-bel map; that won't be affected by out-of-partition moves
+ dict<IdString, BelId> local_cell2bel;
+
+ // Data on an inflight move
+ dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new)
+ // For cluster moves only
+ std::vector<std::pair<CellInfo *, Loc>> cell_rel;
+ // For incremental wirelength and delay updates
+ wirelen_t wirelen_delta = 0;
+ double timing_delta = 0;
+
+ // Total made and accepted moved
+ int n_move = 0, n_accept = 0;
+
+ enum BoundChange
+ {
+ NO_CHANGE,
+ CELL_MOVED_INWARDS,
+ CELL_MOVED_OUTWARDS,
+ FULL_RECOMPUTE
+ };
+ // Wirelen related are handled on a per-axis basis to reduce
+ struct AxisChanges
+ {
+ std::vector<int> bounds_changed_nets;
+ std::vector<BoundChange> already_bounds_changed;
+ };
+ std::array<AxisChanges, 2> axes;
+ std::vector<NetBB> new_net_bounds;
+
+ std::vector<std::vector<bool>> already_timing_changed;
+ std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs;
+ std::vector<double> new_timing_costs;
+
+ ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){};
+ void set_partition(const Partition &part)
+ {
+ p = part;
+ thread_nets.clear();
+ thread_net_idx.resize(g.flat_nets.size());
+ std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1);
+ // Determine the set of nets that are within the thread; and therefore we care about
+ for (auto thread_cell : part.cells) {
+ for (auto &port : thread_cell->ports) {
+ if (!port.second.net)
+ continue;
+ int global_idx = port.second.net->udata;
+ auto &thread_idx = thread_net_idx.at(global_idx);
+ // Already added to the set
+ if (thread_idx != -1)
+ continue;
+ thread_idx = thread_nets.size();
+ thread_nets.push_back(port.second.net);
+ }
+ }
+ tmg_ignored_nets.clear();
+ ignored_nets.clear();
+ for (auto tn : thread_nets) {
+ ignored_nets.push_back(g.skip_net(tn));
+ tmg_ignored_nets.push_back(g.timing_skip_net(tn));
+ }
+ // Set up the original cell-bel map for all nets inside the thread
+ local_cell2bel.clear();
+ for (NetInfo *net : thread_nets) {
+ if (net->driver.cell)
+ local_cell2bel[net->driver.cell->name] = net->driver.cell->bel;
+ for (auto &usr : net->users)
+ local_cell2bel[usr.cell->name] = usr.cell->bel;
+ }
+ }
+ void setup_initial_state()
+ {
+ // Setup initial net bounding boxes and timing costs
+ net_bounds.clear();
+ arc_tmg_cost.clear();
+ for (auto tn : thread_nets) {
+ net_bounds.push_back(g.last_bounds.at(tn->udata));
+ arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata));
+ }
+ new_net_bounds = net_bounds;
+ for (int j = 0; j < 2; j++) {
+ auto &a = axes.at(j);
+ a.already_bounds_changed.resize(net_bounds.size());
+ }
+ already_timing_changed.clear();
+ already_timing_changed.resize(net_bounds.size());
+ for (size_t i = 0; i < thread_nets.size(); i++)
+ already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity());
+ }
+ bool bounds_check(BelId bel)
+ {
+ Loc l = ctx->getBelLocation(bel);
+ if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1)
+ return false;
+ return true;
+ }
+ bool bind_move()
+ {
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+ for (auto &entry : moved_cells) {
+ ctx->unbindBel(entry.second.first);
+ }
+ bool success = true;
+ for (auto &entry : moved_cells) {
+ // Make sure targets are available before we bind them
+ if (!ctx->checkBelAvail(entry.second.second)) {
+ success = false;
+ break;
+ }
+ ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = true;
+ return success;
+ }
+ bool check_validity()
+ {
+ std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+ bool result = true;
+ for (auto e : moved_cells) {
+ if (!ctx->isBelLocationValid(e.second.first)) {
+ // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer
+ // enabling dedicated routes to be used
+ result = false;
+ break;
+ }
+ if (!ctx->isBelLocationValid(e.second.second)) {
+ result = false;
+ break;
+ }
+ }
+ return result;
+ }
+ void revert_move()
+ {
+ if (arch_state_dirty) {
+ // If changes to the arch state were made, revert them by restoring original cell bindings
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+ for (auto &entry : moved_cells) {
+ BelId curr_bound = ctx->cells.at(entry.first)->bel;
+ if (curr_bound != BelId())
+ ctx->unbindBel(curr_bound);
+ }
+ for (auto &entry : moved_cells) {
+ ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = false;
+ }
+ for (auto &entry : moved_cells)
+ local_cell2bel[entry.first] = entry.second.first;
+ }
+ void commit_move()
+ {
+ arch_state_dirty = false;
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ // Commit updated net bounds
+ net_bounds.at(bc) = new_net_bounds.at(bc);
+ }
+ }
+ if (g.cfg.timing_driven) {
+ NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size());
+ for (size_t i = 0; i < timing_changed_arcs.size(); i++) {
+ auto arc = timing_changed_arcs.at(i);
+ arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i);
+ }
+ }
+ }
+ void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel)
+ {
+ Loc new_loc = ctx->getBelLocation(new_bel);
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ for (const auto &port : cell->ports) {
+ NetInfo *pn = port.second.net;
+ if (!pn)
+ continue;
+ int idx = thread_net_idx.at(pn->udata);
+ if (ignored_nets.at(idx))
+ continue;
+ NetBB &new_bounds = new_net_bounds.at(idx);
+ // For the x-axis (i=0) and y-axis (i=1)
+ for (int i = 0; i < 2; i++) {
+ auto &axis = axes.at(i);
+ // New and old on this axis
+ int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x;
+ // References to updated bounding box entries
+ auto &b0 = i ? new_bounds.y0 : new_bounds.x0;
+ auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0;
+ auto &b1 = i ? new_bounds.y1 : new_bounds.x1;
+ auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1;
+ auto &change = axis.already_bounds_changed.at(idx);
+ // Lower bound
+ if (new_pos < b0) {
+ // Further out than current lower bound
+ b0 = new_pos;
+ n0 = 1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b0 && old_pos > b0) {
+ // Moved from inside into current bound
+ ++n0;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b0 && new_pos > b0) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n0 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n0;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ // Upper bound
+ if (new_pos > b1) {
+ // Further out than current upper bound
+ b1 = new_pos;
+ n1 = new_pos;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b1 && old_pos < b1) {
+ // Moved onto current bound
+ ++n1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b1 && new_pos < b1) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n1 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n1;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ }
+ // Timing updates if timing driven
+ if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) {
+ if (port.second.type == PORT_OUT) {
+ int cc;
+ TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
+ if (cls != TMG_IGNORE) {
+ for (auto usr : pn->users.enumerate())
+ if (!already_timing_changed.at(idx).at(usr.index.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index));
+ already_timing_changed.at(idx).at(usr.index.idx()) = true;
+ }
+ }
+ } else {
+ auto usr = port.second.user_idx;
+ if (!already_timing_changed.at(idx).at(usr.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr));
+ already_timing_changed.at(idx).at(usr.idx()) = true;
+ }
+ }
+ }
+ }
+ }
+ void compute_total_change()
+ {
+ auto &xa = axes.at(0), &ya = axes.at(1);
+ for (auto &bc : xa.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE &&
+ ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : xa.bounds_changed_nets)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == NO_CHANGE)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
+ if (g.cfg.timing_driven) {
+ NPNR_ASSERT(new_timing_costs.empty());
+ for (auto arc : timing_changed_arcs) {
+ double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel);
+ timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx()));
+ new_timing_costs.push_back(new_cost);
+ }
+ }
+ }
+ void reset_move_state()
+ {
+ moved_cells.clear();
+ cell_rel.clear();
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ new_net_bounds.at(bc) = net_bounds.at(bc);
+ axis.already_bounds_changed[bc] = NO_CHANGE;
+ }
+ axis.bounds_changed_nets.clear();
+ }
+ for (auto &arc : timing_changed_arcs) {
+ already_timing_changed.at(arc.first).at(arc.second.idx()) = false;
+ }
+ timing_changed_arcs.clear();
+ new_timing_costs.clear();
+ wirelen_delta = 0;
+ timing_delta = 0;
+ }
+
+ bool accept_move()
+ {
+ static constexpr double epsilon = 1e-20;
+ double delta = g.cfg.lambda * (timing_delta / std::max<double>(epsilon, g.total_timing_cost)) +
+ (1.0 - g.cfg.lambda) * (double(wirelen_delta) / std::max<double>(epsilon, g.total_wirelen));
+ return delta < 0 ||
+ (g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature));
+ }
+
+ bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel)
+ {
+ if (!bounds_check(old_bel) || !bounds_check(new_bel))
+ return false;
+ if (!ctx->isValidBelForCellType(cell->type, new_bel))
+ return false;
+ NPNR_ASSERT(!moved_cells.count(cell->name));
+ moved_cells[cell->name] = std::make_pair(old_bel, new_bel);
+ local_cell2bel[cell->name] = new_bel;
+ compute_changes_for_cell(cell, old_bel, new_bel);
+ return true;
+ }
+
+ bool single_cell_swap(CellInfo *cell, BelId new_bel)
+ {
+ NPNR_ASSERT(moved_cells.empty());
+ BelId old_bel = cell->bel;
+ CellInfo *bound = ctx->getBoundBelCell(new_bel);
+ if (bound && (bound->belStrength > STRENGTH_STRONG || bound->cluster != ClusterId()))
+ return false;
+ if (!add_to_move(cell, old_bel, new_bel))
+ goto fail;
+ if (bound && !add_to_move(bound, new_bel, old_bel))
+ goto fail;
+ compute_total_change();
+ // SA acceptance criteria
+
+ if (!accept_move()) {
+ // SA fail
+ goto fail;
+ }
+ // Check validity rules
+ if (!bind_move())
+ goto fail;
+ if (!check_validity())
+ goto fail;
+ // Accepted!
+ commit_move();
+ reset_move_state();
+ return true;
+ fail:
+ revert_move();
+ reset_move_state();
+ return false;
+ }
+
+ bool chain_swap(CellInfo *root_cell, BelId new_root_bel)
+ {
+ NPNR_ASSERT(moved_cells.empty());
+ std::queue<std::pair<ClusterId, BelId>> displaced_clusters;
+ pool<BelId> used_bels;
+ displaced_clusters.emplace(root_cell->cluster, new_root_bel);
+ while (!displaced_clusters.empty()) {
+ std::vector<std::pair<CellInfo *, BelId>> dest_bels;
+ auto cursor = displaced_clusters.front();
+ displaced_clusters.pop();
+ if (!ctx->getClusterPlacement(cursor.first, cursor.second, dest_bels))
+ goto fail;
+ for (const auto &db : dest_bels) {
+ BelId old_bel = db.first->bel;
+ if (moved_cells.count(db.first->name))
+ goto fail;
+ if (!add_to_move(db.first, old_bel, db.second))
+ goto fail;
+ if (used_bels.count(db.second))
+ goto fail;
+ used_bels.insert(db.second);
+
+ CellInfo *bound = ctx->getBoundBelCell(db.second);
+ if (bound) {
+ if (moved_cells.count(bound->name)) {
+ // Don't move a cell multiple times in the same go
+ goto fail;
+ } else if (bound->belStrength > STRENGTH_STRONG) {
+ goto fail;
+ } else if (bound->cluster != ClusterId()) {
+ // Displace the entire cluster
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ Loc bound_loc = ctx->getBelLocation(bound->bel);
+ Loc root_loc = ctx->getBelLocation(ctx->getClusterRootCell(bound->cluster)->bel);
+ BelId new_root = ctx->getBelByLocation(Loc(old_loc.x + (root_loc.x - bound_loc.x),
+ old_loc.y + (root_loc.y - bound_loc.y),
+ old_loc.z + (root_loc.z - bound_loc.z)));
+ if (new_root == BelId())
+ goto fail;
+ displaced_clusters.emplace(bound->cluster, new_root);
+ } else {
+ // Single cell swap
+ if (used_bels.count(old_bel))
+ goto fail;
+ used_bels.insert(old_bel);
+ if (!add_to_move(bound, bound->bel, old_bel))
+ goto fail;
+ }
+ } else if (!ctx->checkBelAvail(db.second)) {
+ goto fail;
+ }
+ }
+ }
+ compute_total_change();
+ // SA acceptance criteria
+
+ if (!accept_move()) {
+ // SA fail
+ goto fail;
+ }
+ // Check validity rules
+ if (!bind_move())
+ goto fail;
+ if (!check_validity())
+ goto fail;
+ // Accepted!
+ commit_move();
+ reset_move_state();
+ return true;
+ fail:
+ revert_move();
+ reset_move_state();
+ return false;
+ }
+
+ BelId random_bel_for_cell(CellInfo *cell, int force_z = -1)
+ {
+ IdString targetType = cell->type;
+ Loc curr_loc = ctx->getBelLocation(cell->bel);
+ int count = 0;
+
+ int dx = g.radius, dy = g.radius;
+ if (cell->region != nullptr && cell->region->constr_bels) {
+ dx = std::min(g.cfg.hpwl_scale_x * g.radius,
+ (g.region_bounds[cell->region->name].x1 - g.region_bounds[cell->region->name].x0) + 1);
+ dy = std::min(g.cfg.hpwl_scale_y * g.radius,
+ (g.region_bounds[cell->region->name].y1 - g.region_bounds[cell->region->name].y0) + 1);
+ // Clamp location to within bounds
+ curr_loc.x = std::max(g.region_bounds[cell->region->name].x0, curr_loc.x);
+ curr_loc.x = std::min(g.region_bounds[cell->region->name].x1, curr_loc.x);
+ curr_loc.y = std::max(g.region_bounds[cell->region->name].y0, curr_loc.y);
+ curr_loc.y = std::min(g.region_bounds[cell->region->name].y1, curr_loc.y);
+ }
+
+ FastBels::FastBelsData *bel_data;
+ auto type_cnt = g.bels.getBelsForCellType(targetType, &bel_data);
+
+ while (true) {
+ int nx = rng.rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0);
+ int ny = rng.rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0);
+ if (type_cnt < 64)
+ nx = ny = 0;
+ if (nx >= int(bel_data->size()))
+ continue;
+ if (ny >= int(bel_data->at(nx).size()))
+ continue;
+ const auto &fb = bel_data->at(nx).at(ny);
+ if (fb.size() == 0)
+ continue;
+ BelId bel = fb.at(rng.rng(int(fb.size())));
+ if (!bounds_check(bel))
+ continue;
+ if (force_z != -1) {
+ Loc loc = ctx->getBelLocation(bel);
+ if (loc.z != force_z)
+ continue;
+ }
+ if (!cell->testRegion(bel))
+ continue;
+ count++;
+ return bel;
+ }
+ }
+
+ void run_iter()
+ {
+ setup_initial_state();
+ n_accept = 0;
+ n_move = 0;
+ for (int m = 0; m < g.cfg.inner_iters; m++) {
+ for (auto cell : p.cells) {
+ if (cell->belStrength > STRENGTH_STRONG)
+ continue;
+ if (cell->cluster != ClusterId()) {
+ if (cell != ctx->getClusterRootCell(cell->cluster))
+ continue; // only move cluster root
+ Loc old_loc = ctx->getBelLocation(cell->bel);
+ BelId new_root = random_bel_for_cell(cell, old_loc.z);
+ if (new_root == BelId() || new_root == cell->bel)
+ continue;
+ ++n_move;
+ if (chain_swap(cell, new_root))
+ ++n_accept;
+ } else {
+ BelId new_bel = random_bel_for_cell(cell);
+ if (new_bel == BelId() || new_bel == cell->bel)
+ continue;
+ ++n_move;
+ if (single_cell_swap(cell, new_bel))
+ ++n_accept;
+ }
+ }
+ }
+ }
+};
+
+struct ParallelRefine
+{
+ Context *ctx;
+ GlobalState g;
+ std::vector<ThreadState> t;
+ ParallelRefine(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), g(ctx, cfg)
+ {
+ g.flat_nets.reserve(ctx->nets.size());
+ for (auto &net : ctx->nets) {
+ net.second->udata = g.flat_nets.size();
+ g.flat_nets.push_back(net.second.get());
+ }
+ // Setup per thread context
+ for (int i = 0; i < cfg.threads; i++) {
+ t.emplace_back(ctx, g, i);
+ }
+ // Setup region bounds
+ for (auto &region : ctx->region) {
+ Region *r = region.second.get();
+ NetBB bb;
+ if (r->constr_bels) {
+ bb.x0 = std::numeric_limits<int>::max();
+ bb.x1 = std::numeric_limits<int>::min();
+ bb.y0 = std::numeric_limits<int>::max();
+ bb.y1 = std::numeric_limits<int>::min();
+ for (auto bel : r->bels) {
+ Loc loc = ctx->getBelLocation(bel);
+ bb.x0 = std::min(bb.x0, loc.x);
+ bb.x1 = std::max(bb.x1, loc.x);
+ bb.y0 = std::min(bb.y0, loc.y);
+ bb.y1 = std::max(bb.y1, loc.y);
+ }
+ } else {
+ bb.x0 = 0;
+ bb.y0 = 0;
+ bb.x1 = ctx->getGridDimX();
+ bb.y1 = ctx->getGridDimY();
+ }
+ g.region_bounds[r->name] = bb;
+ }
+ // Setup fast bels map
+ pool<IdString> cell_types_in_use;
+ for (auto &cell : ctx->cells) {
+ IdString cell_type = cell.second->type;
+ cell_types_in_use.insert(cell_type);
+ }
+
+ for (auto cell_type : cell_types_in_use) {
+ g.bels.addCellType(cell_type);
+ }
+ };
+ std::vector<Partition> parts;
+ void do_partition()
+ {
+ parts.clear();
+ parts.emplace_back(ctx);
+ bool yaxis = false;
+ while (parts.size() < t.size()) {
+ std::vector<Partition> next(parts.size() * 2);
+ for (size_t i = 0; i < parts.size(); i++) {
+ // Randomly permute pivot every iteration so we get different thread boundaries
+ const float delta = 0.1;
+ float pivot = (0.5 - (delta / 2)) + (delta / 2) * (ctx->rng(10000) / 10000.0f);
+ parts.at(i).split(ctx, yaxis, pivot, next.at(i * 2), next.at(i * 2 + 1));
+ }
+ std::swap(parts, next);
+ yaxis = !yaxis;
+ }
+
+ NPNR_ASSERT(parts.size() == t.size());
+ // TODO: thread pool to make this worthwhile...
+ std::vector<std::thread> workers;
+ for (size_t i = 0; i < t.size(); i++) {
+ workers.emplace_back([i, this]() { t.at(i).set_partition(parts.at(i)); });
+ }
+ for (auto &w : workers)
+ w.join();
+ }
+
+ void update_global_costs()
+ {
+ g.last_bounds.resize(g.flat_nets.size());
+ g.last_tmg_costs.resize(g.flat_nets.size());
+ g.total_wirelen = 0;
+ g.total_timing_cost = 0;
+ for (size_t i = 0; i < g.flat_nets.size(); i++) {
+ NetInfo *ni = g.flat_nets.at(i);
+ if (g.skip_net(ni))
+ continue;
+ g.last_bounds.at(i) = NetBB::compute(ctx, ni);
+ g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg);
+ if (!g.timing_skip_net(ni)) {
+ auto &tc = g.last_tmg_costs.at(i);
+ tc.resize(ni->users.capacity());
+ for (auto usr : ni->users.enumerate()) {
+ tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index);
+ g.total_timing_cost += tc.at(usr.index.idx());
+ }
+ }
+ }
+ }
+ void run()
+ {
+
+ ScopeLock<Context> lock(ctx);
+ auto refine_start = std::chrono::high_resolution_clock::now();
+
+ g.tmg.setup_only = true;
+ g.tmg.setup();
+ do_partition();
+ log_info("Running parallel refinement with %d threads.\n", int(t.size()));
+ int iter = 1;
+ bool done = false;
+ update_global_costs();
+ double avg_wirelen = g.total_wirelen;
+ wirelen_t min_wirelen = g.total_wirelen;
+ while (true) {
+ if (iter > 1) {
+ if (g.total_wirelen >= min_wirelen) {
+ done = true;
+ } else if (g.total_wirelen < min_wirelen) {
+ min_wirelen = g.total_wirelen;
+ }
+ int n_accept = 0, n_move = 0;
+ for (auto &t_data : t) {
+ n_accept += t_data.n_accept;
+ n_move += t_data.n_move;
+ }
+ double r_accept = n_accept / double(n_move);
+ if (g.total_wirelen < (0.95 * avg_wirelen) && g.total_wirelen > 0) {
+ avg_wirelen = 0.8 * avg_wirelen + 0.2 * g.total_wirelen;
+ } else {
+ if (r_accept > 0.15 && g.radius > 1) {
+ g.temperature *= 0.95;
+ } else {
+ g.temperature *= 0.8;
+ }
+ }
+ if ((iter % 10) == 0 && g.radius > 1)
+ --g.radius;
+ }
+
+ if ((iter == 1) || ((iter % 5) == 0) || done)
+ log_info(" at iteration #%d: temp = %f, timing cost = "
+ "%.0f, wirelen = %.0f\n",
+ iter, g.temperature, double(g.total_timing_cost), double(g.total_wirelen));
+
+ if (done)
+ break;
+
+ do_partition();
+
+ std::vector<std::thread> workers;
+ workers.reserve(t.size());
+ for (int j = 0; j < int(t.size()); j++)
+ workers.emplace_back([this, j]() { t.at(j).run_iter(); });
+ for (auto &w : workers)
+ w.join();
+ g.tmg.run();
+ update_global_costs();
+ iter++;
+ ctx->yield();
+ }
+ auto refine_end = std::chrono::high_resolution_clock::now();
+ log_info("Placement refine time %.02fs\n", std::chrono::duration<float>(refine_end - refine_start).count());
+ }
+};
+} // namespace
+
+ParallelRefineCfg::ParallelRefineCfg(Context *ctx)
+{
+ timing_driven = ctx->setting<bool>("timing_driven");
+ threads = ctx->setting<int>("threads", 8);
+ // snap to nearest power of two; and minimum thread size
+ int actual_threads = 1;
+ while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size)
+ actual_threads *= 2;
+ threads = actual_threads;
+ hpwl_scale_x = 1;
+ hpwl_scale_y = 1;
+}
+
+bool parallel_refine(Context *ctx, ParallelRefineCfg cfg)
+{
+ // TODO
+ ParallelRefine refine(ctx, cfg);
+ refine.run();
+ timing_analysis(ctx);
+ return true;
+}
+
+NEXTPNR_NAMESPACE_END
+
+#else /* !defined(__wasm) */
+
+NEXTPNR_NAMESPACE_BEGIN
+
+bool parallel_refine(Context *ctx, ParallelRefineCfg cfg) { log_abort(); }
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/parallel_refine.h b/common/place/parallel_refine.h
new file mode 100644
index 00000000..556317cd
--- /dev/null
+++ b/common/place/parallel_refine.h
@@ -0,0 +1,43 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef PARALLEL_REFINE_H
+#define PARALLEL_REFINE_H
+
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct ParallelRefineCfg
+{
+ ParallelRefineCfg(Context *ctx);
+ bool timing_driven;
+ int threads;
+ int hpwl_scale_x, hpwl_scale_y;
+ double lambda = 0.5f;
+ float crit_exp = 8;
+ int inner_iters = 15;
+ int min_thread_size = 500;
+};
+
+bool parallel_refine(Context *ctx, ParallelRefineCfg cfg);
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/place_common.cc b/common/place/place_common.cc
new file mode 100644
index 00000000..e03fca55
--- /dev/null
+++ b/common/place/place_common.cc
@@ -0,0 +1,487 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "place_common.h"
+#include <cmath>
+#include "log.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+// Get the total estimated wirelength for a net
+wirelen_t get_net_metric(const Context *ctx, const NetInfo *net, MetricType type, float &tns)
+{
+ wirelen_t wirelength = 0;
+ CellInfo *driver_cell = net->driver.cell;
+ if (!driver_cell)
+ return 0;
+ if (driver_cell->bel == BelId())
+ return 0;
+ bool driver_gb = ctx->getBelGlobalBuf(driver_cell->bel);
+ if (driver_gb)
+ return 0;
+ int clock_count;
+ bool timing_driven = ctx->setting<bool>("timing_driven") && type == MetricType::COST &&
+ ctx->getPortTimingClass(driver_cell, net->driver.port, clock_count) != TMG_IGNORE;
+ delay_t negative_slack = 0;
+ delay_t worst_slack = std::numeric_limits<delay_t>::max();
+ Loc driver_loc = ctx->getBelLocation(driver_cell->bel);
+ int xmin = driver_loc.x, xmax = driver_loc.x, ymin = driver_loc.y, ymax = driver_loc.y;
+ for (auto load : net->users) {
+ if (load.cell == nullptr)
+ continue;
+ CellInfo *load_cell = load.cell;
+ if (load_cell->bel == BelId())
+ continue;
+ if (timing_driven) {
+ delay_t net_delay = ctx->predictArcDelay(net, load);
+ auto slack = load.budget - net_delay;
+ if (slack < 0)
+ negative_slack += slack;
+ worst_slack = std::min(slack, worst_slack);
+ }
+
+ if (ctx->getBelGlobalBuf(load_cell->bel))
+ continue;
+ Loc load_loc = ctx->getBelLocation(load_cell->bel);
+
+ xmin = std::min(xmin, load_loc.x);
+ ymin = std::min(ymin, load_loc.y);
+ xmax = std::max(xmax, load_loc.x);
+ ymax = std::max(ymax, load_loc.y);
+ }
+ if (timing_driven) {
+ wirelength = wirelen_t(
+ (((ymax - ymin) + (xmax - xmin)) * std::min(5.0, (1.0 + std::exp(-ctx->getDelayNS(worst_slack) / 5)))));
+ } else {
+ wirelength = wirelen_t((ymax - ymin) + (xmax - xmin));
+ }
+
+ tns += ctx->getDelayNS(negative_slack);
+ return wirelength;
+}
+
+// Get the total wirelength for a cell
+wirelen_t get_cell_metric(const Context *ctx, const CellInfo *cell, MetricType type)
+{
+ std::set<IdString> nets;
+ for (auto p : cell->ports) {
+ if (p.second.net)
+ nets.insert(p.second.net->name);
+ }
+ wirelen_t wirelength = 0;
+ float tns = 0;
+ for (auto n : nets) {
+ wirelength += get_net_metric(ctx, ctx->nets.at(n).get(), type, tns);
+ }
+ return wirelength;
+}
+
+wirelen_t get_cell_metric_at_bel(const Context *ctx, CellInfo *cell, BelId bel, MetricType type)
+{
+ BelId oldBel = cell->bel;
+ cell->bel = bel;
+ wirelen_t wirelen = get_cell_metric(ctx, cell, type);
+ cell->bel = oldBel;
+ return wirelen;
+}
+
+// Placing a single cell
+bool place_single_cell(Context *ctx, CellInfo *cell, bool require_legality)
+{
+ bool all_placed = false;
+ int iters = 25;
+ while (!all_placed) {
+ BelId best_bel = BelId();
+ wirelen_t best_wirelen = std::numeric_limits<wirelen_t>::max(),
+ best_ripup_wirelen = std::numeric_limits<wirelen_t>::max();
+ CellInfo *ripup_target = nullptr;
+ BelId ripup_bel = BelId();
+ if (cell->bel != BelId()) {
+ ctx->unbindBel(cell->bel);
+ }
+ IdString targetType = cell->type;
+ for (auto bel : ctx->getBels()) {
+ if (ctx->isValidBelForCellType(targetType, bel)) {
+ if (ctx->checkBelAvail(bel)) {
+ wirelen_t wirelen = get_cell_metric_at_bel(ctx, cell, bel, MetricType::COST);
+ if (iters >= 4)
+ wirelen += ctx->rng(25);
+ if (wirelen <= best_wirelen) {
+ best_wirelen = wirelen;
+ best_bel = bel;
+ }
+ } else {
+ wirelen_t wirelen = get_cell_metric_at_bel(ctx, cell, bel, MetricType::COST);
+ if (iters >= 4)
+ wirelen += ctx->rng(25);
+ if (wirelen <= best_ripup_wirelen) {
+ CellInfo *curr_cell = ctx->getBoundBelCell(bel);
+ if (curr_cell->belStrength < STRENGTH_STRONG) {
+ best_ripup_wirelen = wirelen;
+ ripup_bel = bel;
+ ripup_target = curr_cell;
+ }
+ }
+ }
+ }
+ }
+ if (best_bel == BelId()) {
+ if (iters == 0) {
+ log_error("failed to place cell '%s' of type '%s' (ripup iteration limit exceeded)\n",
+ cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ if (ripup_bel == BelId()) {
+ log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ --iters;
+ ctx->unbindBel(ripup_target->bel);
+ best_bel = ripup_bel;
+ } else {
+ ripup_target = nullptr;
+ all_placed = true;
+ }
+ ctx->bindBel(best_bel, cell, STRENGTH_WEAK);
+ if (require_legality && !ctx->isBelLocationValid(best_bel)) {
+ ctx->unbindBel(best_bel);
+ if (ripup_target != nullptr) {
+ ctx->bindBel(best_bel, ripup_target, STRENGTH_WEAK);
+ }
+ all_placed = false;
+ continue;
+ }
+ if (ctx->verbose)
+ log_info(" placed single cell '%s' at '%s'\n", cell->name.c_str(ctx), ctx->nameOfBel(best_bel));
+ cell = ripup_target;
+ }
+ return true;
+}
+
+class ConstraintLegaliseWorker
+{
+ private:
+ Context *ctx;
+ std::set<IdString> rippedCells;
+ dict<IdString, Loc> oldLocations;
+ dict<ClusterId, std::vector<CellInfo *>> cluster2cells;
+
+ class IncreasingDiameterSearch
+ {
+ public:
+ IncreasingDiameterSearch() : start(0), min(0), max(-1){};
+ IncreasingDiameterSearch(int x) : start(x), min(x), max(x){};
+ IncreasingDiameterSearch(int start, int min, int max) : start(start), min(min), max(max){};
+ bool done() const { return (diameter > (max - min)); };
+ int get() const
+ {
+ int val = start + sign * diameter;
+ val = std::max(val, min);
+ val = std::min(val, max);
+ return val;
+ }
+
+ void next()
+ {
+ if (sign == 0) {
+ sign = 1;
+ diameter = 1;
+ } else if (sign == -1) {
+ sign = 1;
+ if ((start + sign * diameter) > max)
+ sign = -1;
+ ++diameter;
+ } else {
+ sign = -1;
+ if ((start + sign * diameter) < min) {
+ sign = 1;
+ ++diameter;
+ }
+ }
+ }
+
+ void reset()
+ {
+ sign = 0;
+ diameter = 0;
+ }
+
+ private:
+ int start, min, max;
+ int diameter = 0;
+ int sign = 0;
+ };
+
+ typedef dict<IdString, Loc> CellLocations;
+
+ // Check if a location would be suitable for a cell and all its constrained children
+ bool valid_loc_for(const CellInfo *cell, Loc loc, CellLocations &solution, pool<Loc> &usedLocations)
+ {
+ BelId locBel = ctx->getBelByLocation(loc);
+ if (locBel == BelId())
+ return false;
+
+ if (cell->cluster == ClusterId()) {
+ if (!ctx->isValidBelForCellType(cell->type, locBel))
+ return false;
+ if (!ctx->checkBelAvail(locBel)) {
+ CellInfo *confCell = ctx->getConflictingBelCell(locBel);
+ if (confCell->belStrength >= STRENGTH_STRONG) {
+ return false;
+ }
+ }
+ // Don't place at tiles where any strongly bound Bels exist, as we might need to rip them up later
+ for (auto tilebel : ctx->getBelsByTile(loc.x, loc.y)) {
+ CellInfo *tcell = ctx->getBoundBelCell(tilebel);
+ if (tcell && tcell->belStrength >= STRENGTH_STRONG)
+ return false;
+ }
+ usedLocations.insert(loc);
+ solution[cell->name] = loc;
+ } else {
+ std::vector<std::pair<CellInfo *, BelId>> placement;
+ if (!ctx->getClusterPlacement(cell->cluster, locBel, placement))
+ return false;
+ for (auto &p : placement) {
+ Loc p_loc = ctx->getBelLocation(p.second);
+ if (!ctx->checkBelAvail(p.second)) {
+ CellInfo *confCell = ctx->getConflictingBelCell(p.second);
+ if (confCell->belStrength >= STRENGTH_STRONG) {
+ return false;
+ }
+ }
+ // Don't place at tiles where any strongly bound Bels exist, as we might need to rip them up later
+ for (auto tilebel : ctx->getBelsByTile(p_loc.x, p_loc.y)) {
+ CellInfo *tcell = ctx->getBoundBelCell(tilebel);
+ if (tcell && tcell->belStrength >= STRENGTH_STRONG)
+ return false;
+ }
+ usedLocations.insert(p_loc);
+ solution[p.first->name] = p_loc;
+ }
+ }
+
+ return true;
+ }
+
+ // Set the strength to locked on all cells in chain
+ void lockdown_chain(CellInfo *root)
+ {
+ root->belStrength = STRENGTH_STRONG;
+ if (root->cluster != ClusterId())
+ for (auto child : cluster2cells.at(root->cluster))
+ child->belStrength = STRENGTH_STRONG;
+ }
+
+ // Legalise placement constraints on a cell
+ bool legalise_cell(CellInfo *cell)
+ {
+ if (cell->cluster != ClusterId() && ctx->getClusterRootCell(cell->cluster) != cell)
+ return true; // Only process chain roots
+ if (constraints_satisfied(cell)) {
+ if (cell->cluster != ClusterId())
+ lockdown_chain(cell);
+ } else {
+ IncreasingDiameterSearch xRootSearch, yRootSearch, zRootSearch;
+ Loc currentLoc;
+ if (cell->bel != BelId())
+ currentLoc = ctx->getBelLocation(cell->bel);
+ else
+ currentLoc = oldLocations[cell->name];
+ xRootSearch = IncreasingDiameterSearch(currentLoc.x, 0, ctx->getGridDimX() - 1);
+ yRootSearch = IncreasingDiameterSearch(currentLoc.y, 0, ctx->getGridDimY() - 1);
+ zRootSearch = IncreasingDiameterSearch(currentLoc.z, 0, ctx->getTileBelDimZ(currentLoc.x, currentLoc.y));
+
+ while (!xRootSearch.done()) {
+ Loc rootLoc;
+
+ rootLoc.x = xRootSearch.get();
+ rootLoc.y = yRootSearch.get();
+ rootLoc.z = zRootSearch.get();
+ zRootSearch.next();
+ if (zRootSearch.done()) {
+ zRootSearch.reset();
+ yRootSearch.next();
+ if (yRootSearch.done()) {
+ yRootSearch.reset();
+ xRootSearch.next();
+ }
+ }
+
+ CellLocations solution;
+ pool<Loc> used;
+ if (valid_loc_for(cell, rootLoc, solution, used)) {
+ for (auto cp : solution) {
+ // First unbind all cells
+ if (ctx->cells.at(cp.first)->bel != BelId())
+ ctx->unbindBel(ctx->cells.at(cp.first)->bel);
+ }
+ for (auto cp : solution) {
+ if (ctx->verbose)
+ log_info(" placing '%s' at (%d, %d, %d)\n", cp.first.c_str(ctx), cp.second.x,
+ cp.second.y, cp.second.z);
+ BelId target = ctx->getBelByLocation(cp.second);
+ if (!ctx->checkBelAvail(target)) {
+ CellInfo *confl_cell = ctx->getConflictingBelCell(target);
+ if (confl_cell != nullptr) {
+ if (ctx->verbose)
+ log_info(" '%s' already placed at '%s'\n", ctx->nameOf(confl_cell),
+ ctx->nameOfBel(confl_cell->bel));
+ NPNR_ASSERT(confl_cell->belStrength < STRENGTH_STRONG);
+ ctx->unbindBel(target);
+ rippedCells.insert(confl_cell->name);
+ }
+ }
+ ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_STRONG);
+ rippedCells.erase(cp.first);
+ }
+ for (auto cp : solution) {
+ for (auto bel : ctx->getBelsByTile(cp.second.x, cp.second.y)) {
+ CellInfo *belCell = ctx->getBoundBelCell(bel);
+ if (belCell != nullptr && !solution.count(belCell->name)) {
+ if (!ctx->isBelLocationValid(bel)) {
+ NPNR_ASSERT(belCell->belStrength < STRENGTH_STRONG);
+ ctx->unbindBel(bel);
+ rippedCells.insert(belCell->name);
+ }
+ }
+ }
+ }
+ NPNR_ASSERT(constraints_satisfied(cell));
+ return true;
+ }
+ }
+ return false;
+ }
+ return true;
+ }
+
+ // Check if constraints are currently satisfied on a cell and its children
+ bool constraints_satisfied(const CellInfo *cell) { return get_constraints_distance(ctx, cell) == 0; }
+
+ public:
+ ConstraintLegaliseWorker(Context *ctx) : ctx(ctx)
+ {
+ for (auto &cell : ctx->cells) {
+ if (cell.second->cluster != ClusterId())
+ cluster2cells[cell.second->cluster].push_back(cell.second.get());
+ }
+ };
+
+ unsigned print_stats(const char *point)
+ {
+ float distance_sum = 0;
+ float max_distance = 0;
+ unsigned moved_cells = 0;
+ unsigned unplaced_cells = 0;
+ for (auto orig : oldLocations) {
+ if (ctx->cells.at(orig.first)->bel == BelId()) {
+ unplaced_cells++;
+ continue;
+ }
+ Loc newLoc = ctx->getBelLocation(ctx->cells.at(orig.first)->bel);
+ if (newLoc != orig.second) {
+ float distance = std::sqrt(std::pow(newLoc.x - orig.second.x, 2) + pow(newLoc.y - orig.second.y, 2));
+ moved_cells++;
+ distance_sum += distance;
+ if (distance > max_distance)
+ max_distance = distance;
+ }
+ }
+ log_info(" moved %d cells, %d unplaced (after %s)\n", moved_cells, unplaced_cells, point);
+ if (moved_cells > 0) {
+ log_info(" average distance %f\n", (distance_sum / moved_cells));
+ log_info(" maximum distance %f\n", max_distance);
+ }
+ return moved_cells + unplaced_cells;
+ }
+
+ int legalise_constraints()
+ {
+ log_info("Legalising relative constraints...\n");
+ for (auto &cell : ctx->cells) {
+ oldLocations[cell.first] = ctx->getBelLocation(cell.second->bel);
+ }
+ for (auto &cell : ctx->cells) {
+ bool res = legalise_cell(cell.second.get());
+ if (!res) {
+ log_error("failed to place chain starting at cell '%s'\n", cell.first.c_str(ctx));
+ return -1;
+ }
+ }
+ if (print_stats("legalising chains") == 0)
+ return 0;
+ for (auto rippedCell : rippedCells) {
+ bool res = place_single_cell(ctx, ctx->cells.at(rippedCell).get(), true);
+ if (!res) {
+ log_error("failed to place cell '%s' after relative constraint legalisation\n", rippedCell.c_str(ctx));
+ return -1;
+ }
+ }
+ auto score = print_stats("replacing ripped up cells");
+ for (auto &cell : ctx->cells)
+ if (get_constraints_distance(ctx, cell.second.get()) != 0)
+ log_error("constraint satisfaction check failed for cell '%s' at Bel '%s'\n", cell.first.c_str(ctx),
+ ctx->nameOfBel(cell.second->bel));
+ return score;
+ }
+};
+
+bool legalise_relative_constraints(Context *ctx) { return ConstraintLegaliseWorker(ctx).legalise_constraints() > 0; }
+
+// Get the total distance from satisfied constraints for a cell
+int get_constraints_distance(const Context *ctx, const CellInfo *cell)
+{
+ int dist = 0;
+ if (cell->bel == BelId())
+ return 100000;
+ Loc loc = ctx->getBelLocation(cell->bel);
+
+ if (cell->cluster != ClusterId()) {
+ CellInfo *root = ctx->getClusterRootCell(cell->cluster);
+ if (root == cell) {
+ // parent
+ std::vector<std::pair<CellInfo *, BelId>> placement;
+ if (!ctx->getClusterPlacement(cell->cluster, cell->bel, placement)) {
+ return 100000;
+ } else {
+ for (const auto &p : placement) {
+ if (p.first->bel == BelId())
+ return 100000;
+ Loc c_loc = ctx->getBelLocation(p.first->bel);
+ Loc p_loc = ctx->getBelLocation(p.second);
+ dist += std::abs(c_loc.x - p_loc.x);
+ dist += std::abs(c_loc.y - p_loc.y);
+ dist += std::abs(c_loc.z - p_loc.z);
+ }
+ }
+ } else {
+ // child
+ if (root->bel == BelId())
+ return 100000;
+ Loc root_loc = ctx->getBelLocation(root->bel);
+ Loc offset = ctx->getClusterOffset(cell);
+ dist += std::abs((root_loc.x + offset.x) - loc.x);
+ dist += std::abs((root_loc.y + offset.y) - loc.y);
+ }
+ }
+
+ return dist;
+}
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/place_common.h b/common/place/place_common.h
new file mode 100644
index 00000000..5e5cbee3
--- /dev/null
+++ b/common/place/place_common.h
@@ -0,0 +1,55 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef PLACE_COMMON_H
+#define PLACE_COMMON_H
+
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+typedef int64_t wirelen_t;
+
+enum class MetricType
+{
+ COST,
+ WIRELENGTH
+};
+
+// Return the wirelength of a net
+wirelen_t get_net_metric(const Context *ctx, const NetInfo *net, MetricType type, float &tns);
+
+// Return the wirelength of all nets connected to a cell
+wirelen_t get_cell_metric(const Context *ctx, const CellInfo *cell, MetricType type);
+
+// Return the wirelength of all nets connected to a cell, when the cell is at a given bel
+wirelen_t get_cell_metric_at_bel(const Context *ctx, CellInfo *cell, BelId bel, MetricType type);
+
+// Place a single cell in the lowest wirelength Bel available, optionally requiring validity check
+bool place_single_cell(Context *ctx, CellInfo *cell, bool require_legality);
+
+// Modify a design s.t. all relative placement constraints are satisfied
+bool legalise_relative_constraints(Context *ctx);
+
+// Get the total distance from satisfied constraints for a cell
+int get_constraints_distance(const Context *ctx, const CellInfo *cell);
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/placer1.cc b/common/place/placer1.cc
new file mode 100644
index 00000000..a6ba3895
--- /dev/null
+++ b/common/place/placer1.cc
@@ -0,0 +1,1317 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 Claire Xenia Wolf <claire@yosyshq.com>
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Simulated annealing implementation based on arachne-pnr
+ * Copyright (C) 2015-2018 Cotton Seed
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "placer1.h"
+#include <algorithm>
+#include <boost/lexical_cast.hpp>
+#include <boost/range/adaptor/reversed.hpp>
+#include <chrono>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <list>
+#include <map>
+#include <ostream>
+#include <queue>
+#include <set>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+#include "fast_bels.h"
+#include "log.h"
+#include "place_common.h"
+#include "scope_lock.h"
+#include "timing.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+class SAPlacer
+{
+ private:
+ struct BoundingBox
+ {
+ // Actual bounding box
+ int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
+ // Number of cells at each extremity
+ int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
+ wirelen_t hpwl(const Placer1Cfg &cfg) const
+ {
+ return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
+ }
+ };
+
+ public:
+ SAPlacer(Context *ctx, Placer1Cfg cfg)
+ : ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg), tmg(ctx)
+ {
+ for (auto bel : ctx->getBels()) {
+ Loc loc = ctx->getBelLocation(bel);
+ max_x = std::max(max_x, loc.x);
+ max_y = std::max(max_y, loc.y);
+ }
+ diameter = std::max(max_x, max_y) + 1;
+
+ pool<IdString> cell_types_in_use;
+ for (auto &cell : ctx->cells) {
+ IdString cell_type = cell.second->type;
+ cell_types_in_use.insert(cell_type);
+ }
+
+ for (auto cell_type : cell_types_in_use) {
+ fast_bels.addCellType(cell_type);
+ }
+
+ net_bounds.resize(ctx->nets.size());
+ net_arc_tcost.resize(ctx->nets.size());
+ old_udata.reserve(ctx->nets.size());
+ net_by_udata.reserve(ctx->nets.size());
+ decltype(NetInfo::udata) n = 0;
+ for (auto &net : ctx->nets) {
+ old_udata.emplace_back(net.second->udata);
+ net_arc_tcost.at(n).resize(net.second->users.capacity());
+ net.second->udata = n++;
+ net_by_udata.push_back(net.second.get());
+ }
+ for (auto &region : ctx->region) {
+ Region *r = region.second.get();
+ BoundingBox bb;
+ if (r->constr_bels) {
+ bb.x0 = std::numeric_limits<int>::max();
+ bb.x1 = std::numeric_limits<int>::min();
+ bb.y0 = std::numeric_limits<int>::max();
+ bb.y1 = std::numeric_limits<int>::min();
+ for (auto bel : r->bels) {
+ Loc loc = ctx->getBelLocation(bel);
+ bb.x0 = std::min(bb.x0, loc.x);
+ bb.x1 = std::max(bb.x1, loc.x);
+ bb.y0 = std::min(bb.y0, loc.y);
+ bb.y1 = std::max(bb.y1, loc.y);
+ }
+ } else {
+ bb.x0 = 0;
+ bb.y0 = 0;
+ bb.x1 = max_x;
+ bb.y1 = max_y;
+ }
+ region_bounds[r->name] = bb;
+ }
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (ci->cluster == ClusterId())
+ continue;
+ cluster2cell[ci->cluster].push_back(ci);
+ }
+ }
+
+ ~SAPlacer()
+ {
+ for (auto &net : ctx->nets)
+ net.second->udata = old_udata[net.second->udata];
+ }
+
+ bool place(bool refine = false)
+ {
+ log_break();
+
+ ScopeLock<Context> lock(ctx);
+
+ size_t placed_cells = 0;
+ std::vector<CellInfo *> autoplaced;
+ std::vector<CellInfo *> chain_basis;
+ if (!refine) {
+ // Initial constraints placer
+ for (auto &cell_entry : ctx->cells) {
+ CellInfo *cell = cell_entry.second.get();
+ auto loc = cell->attrs.find(ctx->id("BEL"));
+ if (loc != cell->attrs.end()) {
+ std::string loc_name = loc->second.as_string();
+ BelId bel = ctx->getBelByNameStr(loc_name);
+ if (bel == BelId()) {
+ log_error("No Bel named \'%s\' located for "
+ "this chip (processing BEL attribute on \'%s\')\n",
+ loc_name.c_str(), cell->name.c_str(ctx));
+ }
+
+ if (!ctx->isValidBelForCellType(cell->type, bel)) {
+ IdString bel_type = ctx->getBelType(bel);
+ log_error("Bel \'%s\' of type \'%s\' does not match cell "
+ "\'%s\' of type \'%s\'\n",
+ loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ auto bound_cell = ctx->getBoundBelCell(bel);
+ if (bound_cell) {
+ log_error(
+ "Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n",
+ cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx));
+ }
+
+ ctx->bindBel(bel, cell, STRENGTH_USER);
+ if (!ctx->isBelLocationValid(bel)) {
+ IdString bel_type = ctx->getBelType(bel);
+ log_error("Bel \'%s\' of type \'%s\' is not valid for cell "
+ "\'%s\' of type \'%s\'\n",
+ loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ locked_bels.insert(bel);
+ placed_cells++;
+ }
+ }
+ int constr_placed_cells = placed_cells;
+ log_info("Placed %d cells based on constraints.\n", int(placed_cells));
+ ctx->yield();
+
+ // Sort to-place cells for deterministic initial placement
+
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (ci->bel == BelId()) {
+ autoplaced.push_back(cell.second.get());
+ }
+ }
+ std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; });
+ ctx->shuffle(autoplaced);
+ auto iplace_start = std::chrono::high_resolution_clock::now();
+ // Place cells randomly initially
+ log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size()));
+
+ for (auto cell : autoplaced) {
+ place_initial(cell);
+ placed_cells++;
+ if ((placed_cells - constr_placed_cells) % 500 == 0)
+ log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells),
+ int(autoplaced.size()));
+ }
+ if ((placed_cells - constr_placed_cells) % 500 != 0)
+ log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells),
+ int(autoplaced.size()));
+ if (cfg.budgetBased && cfg.slack_redist_iter > 0)
+ assign_budget(ctx);
+ ctx->yield();
+ auto iplace_end = std::chrono::high_resolution_clock::now();
+ log_info("Initial placement time %.02fs\n",
+ std::chrono::duration<float>(iplace_end - iplace_start).count());
+ log_info("Running simulated annealing placer.\n");
+ } else {
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (ci->belStrength > STRENGTH_STRONG) {
+ continue;
+ } else if (ci->cluster != ClusterId()) {
+ if (ctx->getClusterRootCell(ci->cluster) == ci)
+ chain_basis.push_back(ci);
+ else
+ continue;
+ } else {
+ autoplaced.push_back(ci);
+ }
+ }
+ require_legal = false;
+ diameter = 3;
+ log_info("Running simulated annealing placer for refinement.\n");
+ }
+ auto saplace_start = std::chrono::high_resolution_clock::now();
+
+ // Invoke timing analysis to obtain criticalities
+ tmg.setup_only = true;
+ if (!cfg.budgetBased)
+ tmg.setup();
+
+ // Calculate costs after initial placement
+ setup_costs();
+ moveChange.init(this);
+ curr_wirelen_cost = total_wirelen_cost();
+ curr_timing_cost = total_timing_cost();
+ last_wirelen_cost = curr_wirelen_cost;
+ last_timing_cost = curr_timing_cost;
+
+ if (cfg.netShareWeight > 0)
+ setup_nets_by_tile();
+
+ wirelen_t avg_wirelen = curr_wirelen_cost;
+ wirelen_t min_wirelen = curr_wirelen_cost;
+
+ int n_no_progress = 0;
+ temp = refine ? 1e-7 : cfg.startTemp;
+
+ // Main simulated annealing loop
+ for (int iter = 1;; iter++) {
+ n_move = n_accept = 0;
+ improved = false;
+
+ if (iter % 5 == 0 || iter == 1)
+ log_info(" at iteration #%d: temp = %f, timing cost = "
+ "%.0f, wirelen = %.0f\n",
+ iter, temp, double(curr_timing_cost), double(curr_wirelen_cost));
+
+ for (int m = 0; m < 15; ++m) {
+ // Loop through all automatically placed cells
+ for (auto cell : autoplaced) {
+ // Find another random Bel for this cell
+ BelId try_bel = random_bel_for_cell(cell);
+ // If valid, try and swap to a new position and see if
+ // the new position is valid/worthwhile
+ if (try_bel != BelId() && try_bel != cell->bel)
+ try_swap_position(cell, try_bel);
+ }
+ // Also try swapping chains, if applicable
+ for (auto cb : chain_basis) {
+ Loc chain_base_loc = ctx->getBelLocation(cb->bel);
+ BelId try_base = random_bel_for_cell(cb, chain_base_loc.z);
+ if (try_base != BelId() && try_base != cb->bel)
+ try_swap_chain(cb, try_base);
+ }
+ }
+
+ if (ctx->debug) {
+ // Verify correctness of incremental wirelen updates
+ for (size_t i = 0; i < net_bounds.size(); i++) {
+ auto net = net_by_udata[i];
+ if (ignore_net(net))
+ continue;
+ auto &incr = net_bounds.at(i), gold = get_net_bounds(net);
+ NPNR_ASSERT(incr.x0 == gold.x0);
+ NPNR_ASSERT(incr.x1 == gold.x1);
+ NPNR_ASSERT(incr.y0 == gold.y0);
+ NPNR_ASSERT(incr.y1 == gold.y1);
+ NPNR_ASSERT(incr.nx0 == gold.nx0);
+ NPNR_ASSERT(incr.nx1 == gold.nx1);
+ NPNR_ASSERT(incr.ny0 == gold.ny0);
+ NPNR_ASSERT(incr.ny1 == gold.ny1);
+ }
+ }
+
+ if (curr_wirelen_cost < min_wirelen) {
+ min_wirelen = curr_wirelen_cost;
+ improved = true;
+ }
+
+ // Heuristic to improve placement on the 8k
+ if (improved)
+ n_no_progress = 0;
+ else
+ n_no_progress++;
+
+ if (temp <= 1e-7 && n_no_progress >= (refine ? 1 : 5)) {
+ log_info(" at iteration #%d: temp = %f, timing cost = "
+ "%.0f, wirelen = %.0f \n",
+ iter, temp, double(curr_timing_cost), double(curr_wirelen_cost));
+ break;
+ }
+
+ double Raccept = double(n_accept) / double(n_move);
+
+ int M = std::max(max_x, max_y) + 1;
+
+ if (ctx->verbose)
+ log("iter #%d: temp = %f, timing cost = "
+ "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n",
+ iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept);
+
+ if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) {
+ avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost;
+ } else {
+ double diam_next = diameter * (1.0 - 0.44 + Raccept);
+ diameter = std::max<int>(1, std::min<int>(M, int(diam_next + 0.5)));
+ if (Raccept > 0.96) {
+ temp *= 0.5;
+ } else if (Raccept > 0.8) {
+ temp *= 0.9;
+ } else if (Raccept > 0.15 && diameter > 1) {
+ temp *= 0.95;
+ } else {
+ temp *= 0.8;
+ }
+ }
+ // Once cooled below legalise threshold, run legalisation and start requiring
+ // legal moves only
+ if (diameter < legalise_dia && require_legal) {
+ if (legalise_relative_constraints(ctx)) {
+ // Only increase temperature if something was moved
+ autoplaced.clear();
+ chain_basis.clear();
+ for (auto &cell : ctx->cells) {
+ if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->cluster != ClusterId() &&
+ ctx->getClusterRootCell(cell.second->cluster) == cell.second.get())
+ chain_basis.push_back(cell.second.get());
+ else if (cell.second->belStrength < STRENGTH_STRONG)
+ autoplaced.push_back(cell.second.get());
+ }
+ // temp = post_legalise_temp;
+ // diameter = std::min<int>(M, diameter * post_legalise_dia_scale);
+ ctx->shuffle(autoplaced);
+
+ // Legalisation is a big change so force a slack redistribution here
+ if (cfg.slack_redist_iter > 0 && cfg.budgetBased)
+ assign_budget(ctx, true /* quiet */);
+ }
+ require_legal = false;
+ } else if (cfg.budgetBased && cfg.slack_redist_iter > 0 && iter % cfg.slack_redist_iter == 0) {
+ assign_budget(ctx, true /* quiet */);
+ }
+
+ // Invoke timing analysis to obtain criticalities
+ if (!cfg.budgetBased && cfg.timing_driven)
+ tmg.run();
+ // Need to rebuild costs after criticalities change
+ setup_costs();
+ // Reset incremental bounds
+ moveChange.reset(this);
+ moveChange.new_net_bounds = net_bounds;
+
+ // Recalculate total metric entirely to avoid rounding errors
+ // accumulating over time
+ curr_wirelen_cost = total_wirelen_cost();
+ curr_timing_cost = total_timing_cost();
+ last_wirelen_cost = curr_wirelen_cost;
+ last_timing_cost = curr_timing_cost;
+ // Let the UI show visualization updates.
+ ctx->yield();
+ }
+
+ auto saplace_end = std::chrono::high_resolution_clock::now();
+ log_info("SA placement time %.02fs\n", std::chrono::duration<float>(saplace_end - saplace_start).count());
+
+ // Final post-placement validity check
+ ctx->yield();
+ for (auto bel : ctx->getBels()) {
+ CellInfo *cell = ctx->getBoundBelCell(bel);
+ if (!ctx->isBelLocationValid(bel)) {
+ std::string cell_text = "no cell";
+ if (cell != nullptr)
+ cell_text = std::string("cell '") + ctx->nameOf(cell) + "'";
+ if (ctx->force) {
+ log_warning("post-placement validity check failed for Bel '%s' "
+ "(%s)\n",
+ ctx->nameOfBel(bel), cell_text.c_str());
+ } else {
+ log_error("post-placement validity check failed for Bel '%s' "
+ "(%s)\n",
+ ctx->nameOfBel(bel), cell_text.c_str());
+ }
+ }
+ }
+ timing_analysis(ctx);
+
+ return true;
+ }
+
+ private:
+ // Initial random placement
+ void place_initial(CellInfo *cell)
+ {
+ bool all_placed = false;
+ int iters = 25;
+ while (!all_placed) {
+ BelId best_bel = BelId();
+ uint64_t best_score = std::numeric_limits<uint64_t>::max(),
+ best_ripup_score = std::numeric_limits<uint64_t>::max();
+ CellInfo *ripup_target = nullptr;
+ BelId ripup_bel = BelId();
+ if (cell->bel != BelId()) {
+ ctx->unbindBel(cell->bel);
+ }
+ IdString targetType = cell->type;
+
+ auto proc_bel = [&](BelId bel) {
+ if (ctx->isValidBelForCellType(targetType, bel)) {
+ if (ctx->checkBelAvail(bel)) {
+ uint64_t score = ctx->rng64();
+ if (score <= best_score) {
+ best_score = score;
+ best_bel = bel;
+ }
+ } else {
+ uint64_t score = ctx->rng64();
+ CellInfo *bound_cell = ctx->getBoundBelCell(bel);
+ if (score <= best_ripup_score && bound_cell->belStrength < STRENGTH_STRONG) {
+ best_ripup_score = score;
+ ripup_target = bound_cell;
+ ripup_bel = bel;
+ }
+ }
+ }
+ };
+
+ if (cell->region != nullptr && cell->region->constr_bels) {
+ for (auto bel : cell->region->bels) {
+ proc_bel(bel);
+ }
+ } else {
+ for (auto bel : ctx->getBels()) {
+ proc_bel(bel);
+ }
+ }
+
+ if (best_bel == BelId()) {
+ if (iters == 0 || ripup_bel == BelId())
+ log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx));
+ --iters;
+ ctx->unbindBel(ripup_target->bel);
+ best_bel = ripup_bel;
+ } else {
+ ripup_target = nullptr;
+ all_placed = true;
+ }
+ ctx->bindBel(best_bel, cell, STRENGTH_WEAK);
+
+ if (!ctx->isBelLocationValid(best_bel)) {
+ ctx->unbindBel(best_bel);
+ if (ripup_target != nullptr) {
+ ctx->bindBel(best_bel, ripup_target, STRENGTH_WEAK);
+ }
+ all_placed = false;
+ continue;
+ }
+
+ // Back annotate location
+ cell->attrs[ctx->id("BEL")] = ctx->getBelName(cell->bel).str(ctx);
+ cell = ripup_target;
+ }
+ }
+
+ // Attempt a SA position swap, return true on success or false on failure
+ bool try_swap_position(CellInfo *cell, BelId newBel)
+ {
+ static const double epsilon = 1e-20;
+ moveChange.reset(this);
+ if (!require_legal && cell->cluster != ClusterId())
+ return false;
+ BelId oldBel = cell->bel;
+ CellInfo *other_cell = ctx->getBoundBelCell(newBel);
+ if (!require_legal && other_cell != nullptr &&
+ (other_cell->cluster != ClusterId() || other_cell->belStrength > STRENGTH_WEAK)) {
+ return false;
+ }
+ int old_dist = get_constraints_distance(ctx, cell);
+ int new_dist;
+ if (other_cell != nullptr)
+ old_dist += get_constraints_distance(ctx, other_cell);
+ double delta = 0;
+
+ if (!ctx->isValidBelForCellType(cell->type, newBel)) {
+ return false;
+ }
+ if (other_cell != nullptr && !ctx->isValidBelForCellType(other_cell->type, oldBel)) {
+ return false;
+ }
+
+ int net_delta_score = 0;
+ if (cfg.netShareWeight > 0)
+ net_delta_score += update_nets_by_tile(cell, ctx->getBelLocation(cell->bel), ctx->getBelLocation(newBel));
+
+ ctx->unbindBel(oldBel);
+ if (other_cell != nullptr) {
+ ctx->unbindBel(newBel);
+ }
+
+ ctx->bindBel(newBel, cell, STRENGTH_WEAK);
+
+ if (other_cell != nullptr) {
+ ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK);
+ if (cfg.netShareWeight > 0)
+ net_delta_score +=
+ update_nets_by_tile(other_cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel));
+ }
+
+ add_move_cell(moveChange, cell, oldBel);
+
+ if (other_cell != nullptr) {
+ add_move_cell(moveChange, other_cell, newBel);
+ }
+
+ // Always check both the new and old locations; as in some cases of dedicated routing ripping up a cell can deny
+ // use of a dedicated path and thus make a site illegal
+ if (!ctx->isBelLocationValid(newBel) || !ctx->isBelLocationValid(oldBel)) {
+ ctx->unbindBel(newBel);
+ if (other_cell != nullptr)
+ ctx->unbindBel(oldBel);
+ goto swap_fail;
+ }
+
+ // Recalculate metrics for all nets touched by the perturbation
+ compute_cost_changes(moveChange);
+
+ new_dist = get_constraints_distance(ctx, cell);
+ if (other_cell != nullptr)
+ new_dist += get_constraints_distance(ctx, other_cell);
+ delta = lambda * (moveChange.timing_delta / std::max<double>(last_timing_cost, epsilon)) +
+ (1 - lambda) * (double(moveChange.wirelen_delta) / std::max<double>(last_wirelen_cost, epsilon));
+ delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost;
+ if (cfg.netShareWeight > 0)
+ delta += -cfg.netShareWeight * (net_delta_score / std::max<double>(total_net_share, epsilon));
+ n_move++;
+ // SA acceptance criteria
+ if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) {
+ n_accept++;
+ } else {
+ if (other_cell != nullptr)
+ ctx->unbindBel(oldBel);
+ ctx->unbindBel(newBel);
+ goto swap_fail;
+ }
+ commit_cost_changes(moveChange);
+#if 0
+ log_info("swap %s -> %s\n", cell->name.c_str(ctx), ctx->nameOfBel(newBel));
+ if (other_cell != nullptr)
+ log_info("swap %s -> %s\n", other_cell->name.c_str(ctx), ctx->nameOfBel(oldBel));
+#endif
+ return true;
+ swap_fail:
+ ctx->bindBel(oldBel, cell, STRENGTH_WEAK);
+ if (other_cell != nullptr) {
+ ctx->bindBel(newBel, other_cell, STRENGTH_WEAK);
+ if (cfg.netShareWeight > 0)
+ update_nets_by_tile(other_cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel));
+ }
+ if (cfg.netShareWeight > 0)
+ update_nets_by_tile(cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel));
+ return false;
+ }
+
+ // Swap the Bel of a cell with another, return the original location
+ BelId swap_cell_bels(CellInfo *cell, BelId newBel)
+ {
+ BelId oldBel = cell->bel;
+#if 0
+ log_info("%s old: %s new: %s\n", cell->name.c_str(ctx), ctx->nameOfBel(cell->bel), ctx->nameOfBel(newBel));
+#endif
+ CellInfo *bound = ctx->getBoundBelCell(newBel);
+ if (bound != nullptr)
+ ctx->unbindBel(newBel);
+ ctx->unbindBel(oldBel);
+ ctx->bindBel(newBel, cell, (cell->cluster != ClusterId()) ? STRENGTH_STRONG : STRENGTH_WEAK);
+ if (bound != nullptr) {
+ ctx->bindBel(oldBel, bound, (bound->cluster != ClusterId()) ? STRENGTH_STRONG : STRENGTH_WEAK);
+ if (cfg.netShareWeight > 0)
+ update_nets_by_tile(bound, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel));
+ }
+ if (cfg.netShareWeight > 0)
+ update_nets_by_tile(cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel));
+ return oldBel;
+ }
+
+ // Attempt to swap a chain with a non-chain
+ bool try_swap_chain(CellInfo *cell, BelId newBase)
+ {
+ std::vector<std::pair<CellInfo *, Loc>> cell_rel;
+ dict<IdString, BelId> moved_cells;
+ double delta = 0;
+ int orig_share_cost = total_net_share;
+ moveChange.reset(this);
+#if CHAIN_DEBUG
+ log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx));
+#endif
+ std::queue<std::pair<ClusterId, BelId>> displaced_clusters;
+ displaced_clusters.emplace(cell->cluster, newBase);
+ while (!displaced_clusters.empty()) {
+ std::vector<std::pair<CellInfo *, BelId>> dest_bels;
+ auto cursor = displaced_clusters.front();
+#if CHAIN_DEBUG
+ log_info("%d Cluster %s\n", __LINE__, cursor.first.c_str(ctx));
+#endif
+ displaced_clusters.pop();
+ if (!ctx->getClusterPlacement(cursor.first, cursor.second, dest_bels))
+ goto swap_fail;
+ for (const auto &db : dest_bels) {
+ // Ensure the cluster is ripped up
+ if (db.first->bel != BelId()) {
+ moved_cells[db.first->name] = db.first->bel;
+#if CHAIN_DEBUG
+ log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(db.first->bel));
+#endif
+ ctx->unbindBel(db.first->bel);
+ }
+ }
+ for (const auto &db : dest_bels) {
+ CellInfo *bound = ctx->getBoundBelCell(db.second);
+ BelId old_bel = moved_cells.at(db.first->name);
+ if (!ctx->checkBelAvail(old_bel) && bound != nullptr) {
+ // Simple swap no longer possible
+ goto swap_fail;
+ }
+ if (bound != nullptr) {
+ if (moved_cells.count(bound->name)) {
+ // Don't move a cell multiple times in the same go
+ goto swap_fail;
+ } else if (bound->belStrength > STRENGTH_STRONG) {
+ goto swap_fail;
+ } else if (bound->cluster != ClusterId()) {
+ // Displace the entire cluster
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ Loc bound_loc = ctx->getBelLocation(bound->bel);
+ Loc root_loc = ctx->getBelLocation(ctx->getClusterRootCell(bound->cluster)->bel);
+ BelId new_root = ctx->getBelByLocation(Loc(old_loc.x + (root_loc.x - bound_loc.x),
+ old_loc.y + (root_loc.y - bound_loc.y),
+ old_loc.z + (root_loc.z - bound_loc.z)));
+ if (new_root == BelId())
+ goto swap_fail;
+ for (auto cluster_cell : cluster2cell.at(bound->cluster)) {
+ moved_cells[cluster_cell->name] = cluster_cell->bel;
+#if CHAIN_DEBUG
+ log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(cluster_cell->bel));
+#endif
+ ctx->unbindBel(cluster_cell->bel);
+ }
+ displaced_clusters.emplace(bound->cluster, new_root);
+ } else {
+ // Just a single cell to move
+ moved_cells[bound->name] = bound->bel;
+#if CHAIN_DEBUG
+ log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(bound->bel));
+ log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(old_bel), ctx->nameOf(bound));
+#endif
+ ctx->unbindBel(bound->bel);
+ ctx->bindBel(old_bel, bound, STRENGTH_WEAK);
+ }
+ } else if (!ctx->checkBelAvail(db.second)) {
+ goto swap_fail;
+ }
+ // All those shenanigans should now mean the target bel is free to use
+#if CHAIN_DEBUG
+ log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(db.second), ctx->nameOf(db.first));
+#endif
+ ctx->bindBel(db.second, db.first, STRENGTH_WEAK);
+ }
+ }
+
+ for (const auto &mm : moved_cells) {
+ CellInfo *cell = ctx->cells.at(mm.first).get();
+ add_move_cell(moveChange, cell, moved_cells.at(cell->name));
+ if (cfg.netShareWeight > 0)
+ update_nets_by_tile(cell, ctx->getBelLocation(moved_cells.at(cell->name)),
+ ctx->getBelLocation(cell->bel));
+ if (!ctx->isBelLocationValid(cell->bel) || !cell->testRegion(cell->bel))
+ goto swap_fail;
+ }
+#if CHAIN_DEBUG
+ log_info("legal chain swap %s\n", cell->name.c_str(ctx));
+#endif
+ compute_cost_changes(moveChange);
+ delta = lambda * (moveChange.timing_delta / last_timing_cost) +
+ (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost);
+ if (cfg.netShareWeight > 0) {
+ delta +=
+ cfg.netShareWeight * (orig_share_cost - total_net_share) / std::max<double>(total_net_share, 1e-20);
+ }
+ n_move++;
+ // SA acceptance criteria
+ if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) {
+ n_accept++;
+#if CHAIN_DEBUG
+ log_info("accepted chain swap %s\n", cell->name.c_str(ctx));
+#endif
+ } else {
+ goto swap_fail;
+ }
+ commit_cost_changes(moveChange);
+ return true;
+ swap_fail:
+#if CHAIN_DEBUG
+ log_info("Swap failed\n");
+#endif
+ for (auto cell_pair : moved_cells) {
+ CellInfo *cell = ctx->cells.at(cell_pair.first).get();
+ if (cell->bel != BelId()) {
+#if CHAIN_DEBUG
+ log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(cell->bel));
+#endif
+ ctx->unbindBel(cell->bel);
+ }
+ }
+ for (auto cell_pair : moved_cells) {
+ CellInfo *cell = ctx->cells.at(cell_pair.first).get();
+#if CHAIN_DEBUG
+ log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(cell_pair.second), cell->name.c_str(ctx));
+#endif
+ ctx->bindBel(cell_pair.second, cell, STRENGTH_WEAK);
+ }
+ return false;
+ }
+
+ // Find a random Bel of the correct type for a cell, within the specified
+ // diameter
+ BelId random_bel_for_cell(CellInfo *cell, int force_z = -1)
+ {
+ IdString targetType = cell->type;
+ Loc curr_loc = ctx->getBelLocation(cell->bel);
+ int count = 0;
+
+ int dx = diameter, dy = diameter;
+ if (cell->region != nullptr && cell->region->constr_bels) {
+ dx = std::min(cfg.hpwl_scale_x * diameter,
+ (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1);
+ dy = std::min(cfg.hpwl_scale_y * diameter,
+ (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1);
+ // Clamp location to within bounds
+ curr_loc.x = std::max(region_bounds[cell->region->name].x0, curr_loc.x);
+ curr_loc.x = std::min(region_bounds[cell->region->name].x1, curr_loc.x);
+ curr_loc.y = std::max(region_bounds[cell->region->name].y0, curr_loc.y);
+ curr_loc.y = std::min(region_bounds[cell->region->name].y1, curr_loc.y);
+ }
+
+ FastBels::FastBelsData *bel_data;
+ auto type_cnt = fast_bels.getBelsForCellType(targetType, &bel_data);
+
+ while (true) {
+ int nx = ctx->rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0);
+ int ny = ctx->rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0);
+ if (cfg.minBelsForGridPick >= 0 && type_cnt < cfg.minBelsForGridPick)
+ nx = ny = 0;
+ if (nx >= int(bel_data->size()))
+ continue;
+ if (ny >= int(bel_data->at(nx).size()))
+ continue;
+ const auto &fb = bel_data->at(nx).at(ny);
+ if (fb.size() == 0)
+ continue;
+ BelId bel = fb.at(ctx->rng(int(fb.size())));
+ if (force_z != -1) {
+ Loc loc = ctx->getBelLocation(bel);
+ if (loc.z != force_z)
+ continue;
+ }
+ if (!cell->testRegion(bel))
+ continue;
+ if (locked_bels.find(bel) != locked_bels.end())
+ continue;
+ count++;
+ return bel;
+ }
+ }
+
+ // Return true if a net is to be entirely ignored
+ inline bool ignore_net(NetInfo *net)
+ {
+ return net->driver.cell == nullptr || net->driver.cell->bel == BelId() ||
+ ctx->getBelGlobalBuf(net->driver.cell->bel);
+ }
+
+ // Get the bounding box for a net
+ inline BoundingBox get_net_bounds(NetInfo *net)
+ {
+ BoundingBox bb;
+ NPNR_ASSERT(net->driver.cell != nullptr);
+ Loc dloc = ctx->getBelLocation(net->driver.cell->bel);
+ bb.x0 = dloc.x;
+ bb.x1 = dloc.x;
+ bb.y0 = dloc.y;
+ bb.y1 = dloc.y;
+ bb.nx0 = 1;
+ bb.nx1 = 1;
+ bb.ny0 = 1;
+ bb.ny1 = 1;
+ for (auto user : net->users) {
+ if (user.cell->bel == BelId())
+ continue;
+ Loc uloc = ctx->getBelLocation(user.cell->bel);
+ if (bb.x0 == uloc.x)
+ ++bb.nx0;
+ else if (uloc.x < bb.x0) {
+ bb.x0 = uloc.x;
+ bb.nx0 = 1;
+ }
+ if (bb.x1 == uloc.x)
+ ++bb.nx1;
+ else if (uloc.x > bb.x1) {
+ bb.x1 = uloc.x;
+ bb.nx1 = 1;
+ }
+ if (bb.y0 == uloc.y)
+ ++bb.ny0;
+ else if (uloc.y < bb.y0) {
+ bb.y0 = uloc.y;
+ bb.ny0 = 1;
+ }
+ if (bb.y1 == uloc.y)
+ ++bb.ny1;
+ else if (uloc.y > bb.y1) {
+ bb.y1 = uloc.y;
+ bb.ny1 = 1;
+ }
+ }
+
+ return bb;
+ }
+
+ // Get the timing cost for an arc of a net
+ inline double get_timing_cost(NetInfo *net, const PortRef &user)
+ {
+ int cc;
+ if (net->driver.cell == nullptr)
+ return 0;
+ if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE)
+ return 0;
+ if (cfg.budgetBased) {
+ double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user));
+ return std::min(10.0, std::exp(delay - ctx->getDelayNS(user.budget) / 10));
+ } else {
+ float crit = tmg.get_criticality(CellPortKey(user));
+ double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user));
+ return delay * std::pow(crit, crit_exp);
+ }
+ }
+
+ // Set up the cost maps
+ void setup_costs()
+ {
+ for (auto &net : ctx->nets) {
+ NetInfo *ni = net.second.get();
+ if (ignore_net(ni))
+ continue;
+ net_bounds[ni->udata] = get_net_bounds(ni);
+ if (cfg.timing_driven && int(ni->users.entries()) < cfg.timingFanoutThresh)
+ for (auto usr : ni->users.enumerate())
+ net_arc_tcost[ni->udata][usr.index.idx()] = get_timing_cost(ni, usr.value);
+ }
+ }
+
+ // Get the total wiring cost for the design
+ wirelen_t total_wirelen_cost()
+ {
+ wirelen_t cost = 0;
+ for (const auto &net : net_bounds)
+ cost += net.hpwl(cfg);
+ return cost;
+ }
+
+ // Get the total timing cost for the design
+ double total_timing_cost()
+ {
+ double cost = 0;
+ for (const auto &net : net_arc_tcost) {
+ for (auto arc_cost : net) {
+ cost += arc_cost;
+ }
+ }
+ return cost;
+ }
+
+ // Cost-change-related data for a move
+ struct MoveChangeData
+ {
+
+ enum BoundChangeType
+ {
+ NO_CHANGE,
+ CELL_MOVED_INWARDS,
+ CELL_MOVED_OUTWARDS,
+ FULL_RECOMPUTE
+ };
+
+ std::vector<decltype(NetInfo::udata)> bounds_changed_nets_x, bounds_changed_nets_y;
+ std::vector<std::pair<decltype(NetInfo::udata), store_index<PortRef>>> changed_arcs;
+
+ std::vector<BoundChangeType> already_bounds_changed_x, already_bounds_changed_y;
+ std::vector<std::vector<bool>> already_changed_arcs;
+
+ std::vector<BoundingBox> new_net_bounds;
+ std::vector<std::pair<std::pair<decltype(NetInfo::udata), store_index<PortRef>>, double>> new_arc_costs;
+
+ wirelen_t wirelen_delta = 0;
+ double timing_delta = 0;
+
+ void init(SAPlacer *p)
+ {
+ already_bounds_changed_x.resize(p->ctx->nets.size());
+ already_bounds_changed_y.resize(p->ctx->nets.size());
+ already_changed_arcs.resize(p->ctx->nets.size());
+ for (auto &net : p->ctx->nets) {
+ already_changed_arcs.at(net.second->udata).resize(net.second->users.capacity());
+ }
+ new_net_bounds = p->net_bounds;
+ }
+
+ void reset(SAPlacer *p)
+ {
+ for (auto bc : bounds_changed_nets_x) {
+ new_net_bounds[bc] = p->net_bounds[bc];
+ already_bounds_changed_x[bc] = NO_CHANGE;
+ }
+ for (auto bc : bounds_changed_nets_y) {
+ new_net_bounds[bc] = p->net_bounds[bc];
+ already_bounds_changed_y[bc] = NO_CHANGE;
+ }
+ for (const auto &tc : changed_arcs)
+ already_changed_arcs[tc.first][tc.second.idx()] = false;
+ bounds_changed_nets_x.clear();
+ bounds_changed_nets_y.clear();
+ changed_arcs.clear();
+ new_arc_costs.clear();
+ wirelen_delta = 0;
+ timing_delta = 0;
+ }
+
+ } moveChange;
+
+ void add_move_cell(MoveChangeData &mc, CellInfo *cell, BelId old_bel)
+ {
+ Loc curr_loc = ctx->getBelLocation(cell->bel);
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ // Check net bounds
+ for (const auto &port : cell->ports) {
+ NetInfo *pn = port.second.net;
+ if (pn == nullptr)
+ continue;
+ if (ignore_net(pn))
+ continue;
+ BoundingBox &curr_bounds = mc.new_net_bounds[pn->udata];
+ // Incremental bounding box updates
+ // Note that everything other than full updates are applied immediately rather than being queued,
+ // so further updates to the same net in the same move are dealt with correctly.
+ // If a full update is already queued, this can be considered a no-op
+ if (mc.already_bounds_changed_x[pn->udata] != MoveChangeData::FULL_RECOMPUTE) {
+ // Bounds x0
+ if (curr_loc.x < curr_bounds.x0) {
+ // Further out than current bounds x0
+ curr_bounds.x0 = curr_loc.x;
+ curr_bounds.nx0 = 1;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) {
+ // Checking already_bounds_changed_x ensures that each net is only added once
+ // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the
+ // overall cost change
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ }
+ } else if (curr_loc.x == curr_bounds.x0 && old_loc.x > curr_bounds.x0) {
+ curr_bounds.nx0++;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ }
+ } else if (old_loc.x == curr_bounds.x0 && curr_loc.x > curr_bounds.x0) {
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ if (curr_bounds.nx0 == 1) {
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE;
+ } else {
+ curr_bounds.nx0--;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS;
+ }
+ }
+
+ // Bounds x1
+ if (curr_loc.x > curr_bounds.x1) {
+ // Further out than current bounds x1
+ curr_bounds.x1 = curr_loc.x;
+ curr_bounds.nx1 = 1;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) {
+ // Checking already_bounds_changed_x ensures that each net is only added once
+ // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the
+ // overall cost change
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ }
+ } else if (curr_loc.x == curr_bounds.x1 && old_loc.x < curr_bounds.x1) {
+ curr_bounds.nx1++;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ }
+ } else if (old_loc.x == curr_bounds.x1 && curr_loc.x < curr_bounds.x1) {
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.bounds_changed_nets_x.push_back(pn->udata);
+ if (curr_bounds.nx1 == 1) {
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE;
+ } else {
+ curr_bounds.nx1--;
+ if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS;
+ }
+ }
+ }
+ if (mc.already_bounds_changed_y[pn->udata] != MoveChangeData::FULL_RECOMPUTE) {
+ // Bounds y0
+ if (curr_loc.y < curr_bounds.y0) {
+ // Further out than current bounds y0
+ curr_bounds.y0 = curr_loc.y;
+ curr_bounds.ny0 = 1;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ }
+ } else if (curr_loc.y == curr_bounds.y0 && old_loc.y > curr_bounds.y0) {
+ curr_bounds.ny0++;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ }
+ } else if (old_loc.y == curr_bounds.y0 && curr_loc.y > curr_bounds.y0) {
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ if (curr_bounds.ny0 == 1) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE;
+ } else {
+ curr_bounds.ny0--;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS;
+ }
+ }
+
+ // Bounds y1
+ if (curr_loc.y > curr_bounds.y1) {
+ // Further out than current bounds y1
+ curr_bounds.y1 = curr_loc.y;
+ curr_bounds.ny1 = 1;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ }
+ } else if (curr_loc.y == curr_bounds.y1 && old_loc.y < curr_bounds.y1) {
+ curr_bounds.ny1++;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS;
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ }
+ } else if (old_loc.y == curr_bounds.y1 && curr_loc.y < curr_bounds.y1) {
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.bounds_changed_nets_y.push_back(pn->udata);
+ if (curr_bounds.ny1 == 1) {
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE;
+ } else {
+ curr_bounds.ny1--;
+ if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE)
+ mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS;
+ }
+ }
+ }
+
+ if (cfg.timing_driven && int(pn->users.entries()) < cfg.timingFanoutThresh) {
+ // Output ports - all arcs change timing
+ if (port.second.type == PORT_OUT) {
+ int cc;
+ TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
+ if (cls != TMG_IGNORE)
+ for (auto usr : pn->users.enumerate())
+ if (!mc.already_changed_arcs[pn->udata][usr.index.idx()]) {
+ mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr.index));
+ mc.already_changed_arcs[pn->udata][usr.index.idx()] = true;
+ }
+ } else if (port.second.type == PORT_IN) {
+ auto usr_idx = port.second.user_idx;
+ if (!mc.already_changed_arcs[pn->udata][usr_idx.idx()]) {
+ mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr_idx));
+ mc.already_changed_arcs[pn->udata][usr_idx.idx()] = true;
+ }
+ }
+ }
+ }
+ }
+
+ void compute_cost_changes(MoveChangeData &md)
+ {
+ for (const auto &bc : md.bounds_changed_nets_x) {
+ if (md.already_bounds_changed_x[bc] == MoveChangeData::FULL_RECOMPUTE)
+ md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]);
+ }
+ for (const auto &bc : md.bounds_changed_nets_y) {
+ if (md.already_bounds_changed_x[bc] != MoveChangeData::FULL_RECOMPUTE &&
+ md.already_bounds_changed_y[bc] == MoveChangeData::FULL_RECOMPUTE)
+ md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]);
+ }
+
+ for (const auto &bc : md.bounds_changed_nets_x)
+ md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg);
+ for (const auto &bc : md.bounds_changed_nets_y)
+ if (md.already_bounds_changed_x[bc] == MoveChangeData::NO_CHANGE)
+ md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg);
+
+ if (cfg.timing_driven) {
+ for (const auto &tc : md.changed_arcs) {
+ double old_cost = net_arc_tcost.at(tc.first).at(tc.second.idx());
+ double new_cost =
+ get_timing_cost(net_by_udata.at(tc.first), net_by_udata.at(tc.first)->users.at(tc.second));
+ md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost));
+ md.timing_delta += (new_cost - old_cost);
+ md.already_changed_arcs[tc.first][tc.second.idx()] = false;
+ }
+ }
+ }
+
+ void commit_cost_changes(MoveChangeData &md)
+ {
+ for (const auto &bc : md.bounds_changed_nets_x)
+ net_bounds[bc] = md.new_net_bounds[bc];
+ for (const auto &bc : md.bounds_changed_nets_y)
+ net_bounds[bc] = md.new_net_bounds[bc];
+ for (const auto &tc : md.new_arc_costs)
+ net_arc_tcost[tc.first.first].at(tc.first.second.idx()) = tc.second;
+ curr_wirelen_cost += md.wirelen_delta;
+ curr_timing_cost += md.timing_delta;
+ }
+
+ // Simple routeability driven placement
+ const int large_cell_thresh = 50;
+ int total_net_share = 0;
+ std::vector<std::vector<dict<IdString, int>>> nets_by_tile;
+ void setup_nets_by_tile()
+ {
+ total_net_share = 0;
+ nets_by_tile.resize(max_x + 1, std::vector<dict<IdString, int>>(max_y + 1));
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (int(ci->ports.size()) > large_cell_thresh)
+ continue;
+ Loc loc = ctx->getBelLocation(ci->bel);
+ auto &nbt = nets_by_tile.at(loc.x).at(loc.y);
+ for (const auto &port : ci->ports) {
+ if (port.second.net == nullptr)
+ continue;
+ if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel))
+ continue;
+ int &s = nbt[port.second.net->name];
+ if (s > 0)
+ ++total_net_share;
+ ++s;
+ }
+ }
+ }
+
+ int update_nets_by_tile(CellInfo *ci, Loc old_loc, Loc new_loc)
+ {
+ if (int(ci->ports.size()) > large_cell_thresh)
+ return 0;
+ int loss = 0, gain = 0;
+ auto &nbt_old = nets_by_tile.at(old_loc.x).at(old_loc.y);
+ auto &nbt_new = nets_by_tile.at(new_loc.x).at(new_loc.y);
+
+ for (const auto &port : ci->ports) {
+ if (port.second.net == nullptr)
+ continue;
+ if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel))
+ continue;
+ int &o = nbt_old[port.second.net->name];
+ --o;
+ NPNR_ASSERT(o >= 0);
+ if (o > 0)
+ ++loss;
+ int &n = nbt_new[port.second.net->name];
+ if (n > 0)
+ ++gain;
+ ++n;
+ }
+ int delta = gain - loss;
+ total_net_share += delta;
+ return delta;
+ }
+
+ // Get the combined wirelen/timing metric
+ inline double curr_metric()
+ {
+ return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost - cfg.netShareWeight * total_net_share;
+ }
+
+ // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds
+ std::vector<BoundingBox> net_bounds;
+ // Map net arcs to their timing cost (criticality * delay ns)
+ std::vector<std::vector<double>> net_arc_tcost;
+
+ // Fast lookup for cell to clusters
+ dict<ClusterId, std::vector<CellInfo *>> cluster2cell;
+
+ // Wirelength and timing cost at last and current iteration
+ wirelen_t last_wirelen_cost, curr_wirelen_cost;
+ double last_timing_cost, curr_timing_cost;
+
+ Context *ctx;
+ float temp = 10;
+ float crit_exp = 8;
+ float lambda = 0.5;
+ bool improved = false;
+ int n_move, n_accept;
+ int diameter = 35, max_x = 1, max_y = 1;
+ dict<IdString, std::tuple<int, int>> bel_types;
+ dict<IdString, BoundingBox> region_bounds;
+ FastBels fast_bels;
+ pool<BelId> locked_bels;
+ std::vector<NetInfo *> net_by_udata;
+ std::vector<decltype(NetInfo::udata)> old_udata;
+ bool require_legal = true;
+ const int legalise_dia = 4;
+ Placer1Cfg cfg;
+
+ TimingAnalyser tmg;
+};
+
+Placer1Cfg::Placer1Cfg(Context *ctx)
+{
+ constraintWeight = ctx->setting<float>("placer1/constraintWeight", 10);
+ netShareWeight = ctx->setting<float>("placer1/netShareWeight", 0);
+ minBelsForGridPick = ctx->setting<int>("placer1/minBelsForGridPick", 64);
+ budgetBased = ctx->setting<bool>("placer1/budgetBased", false);
+ startTemp = ctx->setting<float>("placer1/startTemp", 1);
+ timingFanoutThresh = std::numeric_limits<int>::max();
+ timing_driven = ctx->setting<bool>("timing_driven");
+ slack_redist_iter = ctx->setting<int>("slack_redist_iter");
+ hpwl_scale_x = 1;
+ hpwl_scale_y = 1;
+}
+
+bool placer1(Context *ctx, Placer1Cfg cfg)
+{
+ try {
+ SAPlacer placer(ctx, cfg);
+ placer.place();
+ log_info("Checksum: 0x%08x\n", ctx->checksum());
+#ifndef NDEBUG
+ ctx->lock();
+ ctx->check();
+ ctx->unlock();
+#endif
+ return true;
+ } catch (log_execution_error_exception) {
+#ifndef NDEBUG
+ ctx->lock();
+ ctx->check();
+ ctx->unlock();
+#endif
+ return false;
+ }
+}
+
+bool placer1_refine(Context *ctx, Placer1Cfg cfg)
+{
+ try {
+ SAPlacer placer(ctx, cfg);
+ placer.place(true);
+ log_info("Checksum: 0x%08x\n", ctx->checksum());
+#ifndef NDEBUG
+ ctx->lock();
+ ctx->check();
+ ctx->unlock();
+#endif
+ return true;
+ } catch (log_execution_error_exception) {
+#ifndef NDEBUG
+ ctx->lock();
+ ctx->check();
+ ctx->unlock();
+#endif
+ return false;
+ }
+}
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/placer1.h b/common/place/placer1.h
new file mode 100644
index 00000000..9dfb0b0d
--- /dev/null
+++ b/common/place/placer1.h
@@ -0,0 +1,45 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 Claire Xenia Wolf <claire@yosyshq.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+#ifndef PLACE_H
+#define PLACE_H
+
+#include "log.h"
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct Placer1Cfg
+{
+ Placer1Cfg(Context *ctx);
+ float constraintWeight, netShareWeight;
+ int minBelsForGridPick;
+ bool budgetBased;
+ float startTemp;
+ int timingFanoutThresh;
+ bool timing_driven;
+ int slack_redist_iter;
+ int hpwl_scale_x, hpwl_scale_y;
+};
+
+extern bool placer1(Context *ctx, Placer1Cfg cfg);
+extern bool placer1_refine(Context *ctx, Placer1Cfg cfg);
+
+NEXTPNR_NAMESPACE_END
+
+#endif // PLACE_H
diff --git a/common/place/placer_heap.cc b/common/place/placer_heap.cc
new file mode 100644
index 00000000..4c9ffb23
--- /dev/null
+++ b/common/place/placer_heap.cc
@@ -0,0 +1,1830 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2019 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * [[cite]] HeAP
+ * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson
+ * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf
+ *
+ * [[cite]] SimPL
+ * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov
+ * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf
+ *
+ * Notable changes from the original algorithm
+ * - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is
+ * added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation)
+ * as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by
+ * spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices.
+ * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2)
+ */
+
+#ifdef WITH_HEAP
+
+#include "placer_heap.h"
+#include <Eigen/Core>
+#include <Eigen/IterativeLinearSolvers>
+#include <boost/optional.hpp>
+#include <chrono>
+#include <deque>
+#include <fstream>
+#include <numeric>
+#include <queue>
+#include <tuple>
+#include "fast_bels.h"
+#include "log.h"
+#include "nextpnr.h"
+#include "parallel_refine.h"
+#include "place_common.h"
+#include "placer1.h"
+#include "scope_lock.h"
+#include "timing.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+namespace {
+// A simple internal representation for a sparse system of equations Ax = rhs
+// This is designed to decouple the functions that build the matrix to the engine that
+// solves it, and the representation that requires
+template <typename T> struct EquationSystem
+{
+
+ EquationSystem(size_t rows, size_t cols)
+ {
+ A.resize(cols);
+ rhs.resize(rows);
+ }
+
+ // Simple sparse format, easy to convert to CCS for solver
+ std::vector<std::vector<std::pair<int, T>>> A; // col -> (row, x[row, col]) sorted by row
+ std::vector<T> rhs; // RHS vector
+ void reset()
+ {
+ for (auto &col : A)
+ col.clear();
+ std::fill(rhs.begin(), rhs.end(), T());
+ }
+
+ void add_coeff(int row, int col, T val)
+ {
+ auto &Ac = A.at(col);
+ // Binary search
+ int b = 0, e = int(Ac.size()) - 1;
+ while (b <= e) {
+ int i = (b + e) / 2;
+ if (Ac.at(i).first == row) {
+ Ac.at(i).second += val;
+ return;
+ }
+ if (Ac.at(i).first > row)
+ e = i - 1;
+ else
+ b = i + 1;
+ }
+ Ac.insert(Ac.begin() + b, std::make_pair(row, val));
+ }
+
+ void add_rhs(int row, T val) { rhs[row] += val; }
+
+ void solve(std::vector<T> &x, float tolerance)
+ {
+ using namespace Eigen;
+ if (x.empty())
+ return;
+ NPNR_ASSERT(x.size() == A.size());
+
+ VectorXd vx(x.size()), vb(rhs.size());
+ SparseMatrix<T> mat(A.size(), A.size());
+
+ std::vector<int> colnnz;
+ for (auto &Ac : A)
+ colnnz.push_back(int(Ac.size()));
+ mat.reserve(colnnz);
+ for (int col = 0; col < int(A.size()); col++) {
+ auto &Ac = A.at(col);
+ for (auto &el : Ac)
+ mat.insert(el.first, col) = el.second;
+ }
+
+ for (int i = 0; i < int(x.size()); i++)
+ vx[i] = x.at(i);
+ for (int i = 0; i < int(rhs.size()); i++)
+ vb[i] = rhs.at(i);
+
+ ConjugateGradient<SparseMatrix<T>, Lower | Upper> solver;
+ solver.setTolerance(tolerance);
+ VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx);
+ for (int i = 0; i < int(x.size()); i++)
+ x.at(i) = xr[i];
+ // for (int i = 0; i < int(x.size()); i++)
+ // log_info("x[%d] = %f\n", i, x.at(i));
+ }
+};
+
+} // namespace
+
+class HeAPPlacer
+{
+ public:
+ HeAPPlacer(Context *ctx, PlacerHeapCfg cfg)
+ : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx)
+ {
+ Eigen::initParallel();
+ tmg.setup_only = true;
+ tmg.setup();
+
+ for (auto &cell : ctx->cells)
+ if (cell.second->cluster != ClusterId())
+ cluster2cells[cell.second->cluster].push_back(cell.second.get());
+ }
+
+ bool place()
+ {
+ auto startt = std::chrono::high_resolution_clock::now();
+
+ ScopeLock<Context> lock(ctx);
+ place_constraints();
+ build_fast_bels();
+ seed_placement();
+ update_all_chains();
+ wirelen_t hpwl = total_hpwl();
+ log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n",
+ int(place_cells.size()), int(hpwl));
+ for (int i = 0; i < 4; i++) {
+ setup_solve_cells();
+ auto solve_startt = std::chrono::high_resolution_clock::now();
+#ifdef NPNR_DISABLE_THREADS
+ build_solve_direction(false, -1);
+ build_solve_direction(true, -1);
+#else
+ boost::thread xaxis([&]() { build_solve_direction(false, -1); });
+ build_solve_direction(true, -1);
+ xaxis.join();
+#endif
+ auto solve_endt = std::chrono::high_resolution_clock::now();
+ solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count();
+
+ update_all_chains();
+
+ hpwl = total_hpwl();
+ log_info(" at initial placer iter %d, wirelen = %d\n", i, int(hpwl));
+ }
+
+ wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits<wirelen_t>::max();
+ int iter = 0, stalled = 0;
+
+ std::vector<std::tuple<CellInfo *, BelId, PlaceStrength>> solution;
+
+ std::vector<pool<BelBucketId>> heap_runs;
+ pool<BelBucketId> all_buckets;
+ dict<BelBucketId, int> bucket_count;
+
+ for (auto cell : place_cells) {
+ BelBucketId bucket = ctx->getBelBucketForCellType(cell->type);
+ if (!all_buckets.count(bucket)) {
+ heap_runs.push_back(pool<BelBucketId>{bucket});
+ all_buckets.insert(bucket);
+ }
+ bucket_count[bucket]++;
+ }
+ // If more than 98% of cells are one cell type, always solve all at once
+ // Otherwise, follow full HeAP strategy of rotate&all
+ for (auto &c : bucket_count) {
+ if (c.second >= 0.98 * int(place_cells.size())) {
+ heap_runs.clear();
+ break;
+ }
+ }
+
+ if (cfg.placeAllAtOnce) {
+ // Never want to deal with LUTs, FFs, MUXFxs separately,
+ // for now disable all single-cell-type runs and only have heterogeneous
+ // runs
+ heap_runs.clear();
+ }
+
+ heap_runs.push_back(all_buckets);
+ // The main HeAP placer loop
+ log_info("Running main analytical placer.\n");
+ while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) {
+ // Alternate between particular bel types and all bels
+ for (auto &run : heap_runs) {
+ auto run_startt = std::chrono::high_resolution_clock::now();
+
+ setup_solve_cells(&run);
+ if (solve_cells.empty())
+ continue;
+ // Heuristic: don't bother with threading below a certain size
+ auto solve_startt = std::chrono::high_resolution_clock::now();
+
+ // Build the connectivity matrix and run the solver; multithreaded between x and y axes if applicable
+#ifndef NPNR_DISABLE_THREADS
+ if (solve_cells.size() >= 500) {
+ boost::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); });
+ build_solve_direction(true, (iter == 0) ? -1 : iter);
+ xaxis.join();
+ } else
+#endif
+ {
+ build_solve_direction(false, (iter == 0) ? -1 : iter);
+ build_solve_direction(true, (iter == 0) ? -1 : iter);
+ }
+ auto solve_endt = std::chrono::high_resolution_clock::now();
+ solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count();
+ update_all_chains();
+ solved_hpwl = total_hpwl();
+
+ update_all_chains();
+
+ // Run the spreader
+ for (const auto &group : cfg.cellGroups)
+ CutSpreader(this, group).run();
+
+ for (auto type : run)
+ if (std::all_of(cfg.cellGroups.begin(), cfg.cellGroups.end(),
+ [type](const pool<BelBucketId> &grp) { return !grp.count(type); }))
+ CutSpreader(this, {type}).run();
+
+ // Run strict legalisation to find a valid bel for all cells
+ update_all_chains();
+ spread_hpwl = total_hpwl();
+ legalise_placement_strict(true);
+ update_all_chains();
+
+ legal_hpwl = total_hpwl();
+ auto run_stopt = std::chrono::high_resolution_clock::now();
+
+ IdString bucket_name = ctx->getBelBucketName(*run.begin());
+ log_info(" at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n",
+ iter + 1, (run.size() > 1 ? "ALL" : bucket_name.c_str(ctx)), int(solved_hpwl),
+ int(spread_hpwl), int(legal_hpwl),
+ std::chrono::duration<double>(run_stopt - run_startt).count());
+ }
+
+ // Update timing weights
+ if (cfg.timing_driven)
+ tmg.run();
+
+ if (legal_hpwl < best_hpwl) {
+ best_hpwl = legal_hpwl;
+ stalled = 0;
+ // Save solution
+ solution.clear();
+ for (auto &cell : ctx->cells) {
+ solution.emplace_back(cell.second.get(), cell.second->bel, cell.second->belStrength);
+ }
+ } else {
+ ++stalled;
+ }
+ for (auto &cl : cell_locs) {
+ cl.second.legal_x = cl.second.x;
+ cl.second.legal_y = cl.second.y;
+ }
+ ctx->yield();
+ ++iter;
+ }
+
+ // Apply saved solution
+ for (auto &sc : solution) {
+ CellInfo *cell = std::get<0>(sc);
+ if (cell->bel != BelId())
+ ctx->unbindBel(cell->bel);
+ }
+ for (auto &sc : solution) {
+ CellInfo *cell;
+ BelId bel;
+ PlaceStrength strength;
+ std::tie(cell, bel, strength) = sc;
+ ctx->bindBel(bel, cell, strength);
+ }
+
+ for (auto &cell : ctx->cells) {
+ if (cell.second->bel == BelId())
+ log_error("Found unbound cell %s\n", cell.first.c_str(ctx));
+ if (ctx->getBoundBelCell(cell.second->bel) != cell.second.get())
+ log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx));
+ if (ctx->debug)
+ log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->nameOfBel(cell.second->bel));
+ }
+
+ bool any_bad_placements = false;
+ for (auto bel : ctx->getBels()) {
+ CellInfo *cell = ctx->getBoundBelCell(bel);
+ if (!ctx->isBelLocationValid(bel)) {
+ std::string cell_text = "no cell";
+ if (cell != nullptr)
+ cell_text = std::string("cell '") + ctx->nameOf(cell) + "'";
+ log_warning("post-placement validity check failed for Bel '%s' "
+ "(%s)\n",
+ ctx->nameOfBel(bel), cell_text.c_str());
+ any_bad_placements = true;
+ }
+ }
+
+ if (any_bad_placements) {
+ return false;
+ }
+
+ auto endtt = std::chrono::high_resolution_clock::now();
+ log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration<double>(endtt - startt).count());
+ log_info(" of which solving equations: %.02fs\n", solve_time);
+ log_info(" of which spreading cells: %.02fs\n", cl_time);
+ log_info(" of which strict legalisation: %.02fs\n", sl_time);
+
+ ctx->check();
+ lock.unlock_early();
+
+#if !defined(__wasm)
+ if (cfg.parallelRefine) {
+ if (!parallel_refine(ctx, ParallelRefineCfg(ctx))) {
+ return false;
+ }
+ } else
+#endif
+ {
+ if (!placer1_refine(ctx, Placer1Cfg(ctx))) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private:
+ Context *ctx;
+ PlacerHeapCfg cfg;
+
+ int max_x = 0, max_y = 0;
+ FastBels fast_bels;
+ dict<IdString, std::tuple<int, int>> bel_types;
+
+ TimingAnalyser tmg;
+
+ struct BoundingBox
+ {
+ // Actual bounding box
+ int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
+ };
+
+ dict<IdString, BoundingBox> constraint_region_bounds;
+
+ // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom
+ // structure instead
+ struct CellLocation
+ {
+ int x, y;
+ int legal_x, legal_y;
+ double rawx, rawy;
+ bool locked, global;
+ };
+ dict<IdString, CellLocation> cell_locs;
+ // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains
+ // (only the root of each macro is placed.)
+ std::vector<CellInfo *> place_cells;
+
+ // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place
+ // cells of a certain type)
+ std::vector<CellInfo *> solve_cells;
+
+ dict<ClusterId, std::vector<CellInfo *>> cluster2cells;
+ dict<ClusterId, int> chain_size;
+ // Performance counting
+ double solve_time = 0, cl_time = 0, sl_time = 0;
+
+ // Place cells with the BEL attribute set to constrain them
+ void place_constraints()
+ {
+ size_t placed_cells = 0;
+ // Initial constraints placer
+ for (auto &cell_entry : ctx->cells) {
+ CellInfo *cell = cell_entry.second.get();
+
+ auto loc = cell->attrs.find(ctx->id("BEL"));
+ if (loc != cell->attrs.end()) {
+ std::string loc_name = loc->second.as_string();
+ BelId bel = ctx->getBelByNameStr(loc_name);
+ if (bel == BelId()) {
+ log_error("No Bel named \'%s\' located for "
+ "this chip (processing BEL attribute on \'%s\')\n",
+ loc_name.c_str(), cell->name.c_str(ctx));
+ }
+
+ if (!ctx->isValidBelForCellType(cell->type, bel)) {
+ IdString bel_type = ctx->getBelType(bel);
+ log_error("Bel \'%s\' of type \'%s\' does not match cell "
+ "\'%s\' of type \'%s\'\n",
+ loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ auto bound_cell = ctx->getBoundBelCell(bel);
+ if (bound_cell) {
+ log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n",
+ cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx));
+ }
+
+ ctx->bindBel(bel, cell, STRENGTH_USER);
+ if (!ctx->isBelLocationValid(bel)) {
+ IdString bel_type = ctx->getBelType(bel);
+ log_error("Bel \'%s\' of type \'%s\' is not valid for cell "
+ "\'%s\' of type \'%s\'\n",
+ loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+ }
+ placed_cells++;
+ }
+ }
+ log_info("Placed %d cells based on constraints.\n", int(placed_cells));
+ ctx->yield();
+ }
+
+ void build_fast_bels()
+ {
+ for (auto bel : ctx->getBels()) {
+ if (!ctx->checkBelAvail(bel))
+ continue;
+ Loc loc = ctx->getBelLocation(bel);
+ max_x = std::max(max_x, loc.x);
+ max_y = std::max(max_y, loc.y);
+ }
+
+ pool<IdString> cell_types_in_use;
+ pool<BelBucketId> buckets_in_use;
+ for (auto &cell : ctx->cells) {
+ IdString cell_type = cell.second->type;
+ cell_types_in_use.insert(cell_type);
+ BelBucketId bucket = ctx->getBelBucketForCellType(cell_type);
+ buckets_in_use.insert(bucket);
+ }
+
+ for (auto cell_type : cell_types_in_use) {
+ fast_bels.addCellType(cell_type);
+ }
+ for (auto bucket : buckets_in_use) {
+ fast_bels.addBelBucket(bucket);
+ }
+
+ // Determine bounding boxes of region constraints
+ for (auto &region : ctx->region) {
+ Region *r = region.second.get();
+ BoundingBox bb;
+ if (r->constr_bels) {
+ bb.x0 = std::numeric_limits<int>::max();
+ bb.x1 = std::numeric_limits<int>::min();
+ bb.y0 = std::numeric_limits<int>::max();
+ bb.y1 = std::numeric_limits<int>::min();
+ for (auto bel : r->bels) {
+ Loc loc = ctx->getBelLocation(bel);
+ bb.x0 = std::min(bb.x0, loc.x);
+ bb.x1 = std::max(bb.x1, loc.x);
+ bb.y0 = std::min(bb.y0, loc.y);
+ bb.y1 = std::max(bb.y1, loc.y);
+ }
+ } else {
+ bb.x0 = 0;
+ bb.y0 = 0;
+ bb.x1 = max_x;
+ bb.y1 = max_y;
+ }
+ constraint_region_bounds[r->name] = bb;
+ }
+ }
+
+ // Build and solve in one direction
+ void build_solve_direction(bool yaxis, int iter)
+ {
+ for (int i = 0; i < 5; i++) {
+ EquationSystem<double> esx(solve_cells.size(), solve_cells.size());
+ build_equations(esx, yaxis, iter);
+ solve_equations(esx, yaxis);
+ }
+ }
+
+ // Check if a cell has any meaningful connectivity
+ bool has_connectivity(CellInfo *cell)
+ {
+ for (auto port : cell->ports) {
+ if (port.second.net != nullptr && port.second.net->driver.cell != nullptr &&
+ !port.second.net->users.empty())
+ return true;
+ }
+ return false;
+ }
+
+ // Build up a random initial placement, without regard to legality
+ // FIXME: Are there better approaches to the initial placement (e.g. greedy?)
+ void seed_placement()
+ {
+ pool<IdString> cell_types;
+ for (const auto &cell : ctx->cells) {
+ cell_types.insert(cell.second->type);
+ }
+
+ pool<BelId> bels_used;
+ dict<IdString, std::deque<BelId>> available_bels;
+
+ for (auto bel : ctx->getBels()) {
+ if (!ctx->checkBelAvail(bel)) {
+ continue;
+ }
+
+ for (auto cell_type : cell_types) {
+ if (ctx->isValidBelForCellType(cell_type, bel)) {
+ available_bels[cell_type].push_back(bel);
+ }
+ }
+ }
+
+ for (auto &t : available_bels) {
+ ctx->shuffle(t.second.begin(), t.second.end());
+ }
+
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (ci->bel != BelId()) {
+ Loc loc = ctx->getBelLocation(ci->bel);
+ cell_locs[cell.first].x = loc.x;
+ cell_locs[cell.first].y = loc.y;
+ cell_locs[cell.first].locked = true;
+ cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel);
+ } else if (ci->cluster == ClusterId() || ctx->getClusterRootCell(ci->cluster) == ci) {
+ bool placed = false;
+ int attempt_count = 0;
+ while (!placed) {
+ ++attempt_count;
+ if (attempt_count > 25000) {
+ log_error("Unable to find a placement location for cell '%s'\n", ci->name.c_str(ctx));
+ }
+
+ // Make sure this cell type is in the available BEL map at
+ // all.
+ if (!available_bels.count(ci->type)) {
+ log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n",
+ ci->name.c_str(ctx), ci->type.c_str(ctx));
+ }
+
+ // Find an unused BEL from bels_for_cell_type.
+ auto &bels_for_cell_type = available_bels.at(ci->type);
+ BelId bel;
+ while (true) {
+ if (bels_for_cell_type.empty()) {
+ log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n",
+ ci->name.c_str(ctx), ci->type.c_str(ctx));
+ }
+
+ BelId candidate_bel = bels_for_cell_type.back();
+ bels_for_cell_type.pop_back();
+ if (bels_used.count(candidate_bel)) {
+ // candidate_bel has already been used by another
+ // cell type, skip it.
+ continue;
+ }
+
+ bel = candidate_bel;
+ break;
+ }
+
+ Loc loc = ctx->getBelLocation(bel);
+ cell_locs[cell.first].x = loc.x;
+ cell_locs[cell.first].y = loc.y;
+ cell_locs[cell.first].locked = false;
+ cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel);
+
+ // FIXME
+ if (has_connectivity(cell.second.get()) && !cfg.ioBufTypes.count(ci->type)) {
+ bels_used.insert(bel);
+ place_cells.push_back(ci);
+ placed = true;
+ } else {
+ ctx->bindBel(bel, ci, STRENGTH_STRONG);
+ if (ctx->isBelLocationValid(bel)) {
+ cell_locs[cell.first].locked = true;
+ placed = true;
+ bels_used.insert(bel);
+ } else {
+ ctx->unbindBel(bel);
+ available_bels.at(ci->type).push_front(bel);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Setup the cells to be solved, returns the number of rows
+ int setup_solve_cells(pool<BelBucketId> *buckets = nullptr)
+ {
+ int row = 0;
+ solve_cells.clear();
+ // First clear the udata of all cells
+ for (auto &cell : ctx->cells)
+ cell.second->udata = dont_solve;
+ // Then update cells to be placed, which excludes cell children
+ for (auto cell : place_cells) {
+ if (buckets && !buckets->count(ctx->getBelBucketForCellType(cell->type)))
+ continue;
+ cell->udata = row++;
+ solve_cells.push_back(cell);
+ }
+ // Finally, update the udata of children
+ for (auto &cluster : cluster2cells)
+ for (auto child : cluster.second)
+ child->udata = ctx->getClusterRootCell(cluster.first)->udata;
+ return row;
+ }
+
+ // Update all chains
+ void update_all_chains()
+ {
+ for (auto cell : place_cells) {
+ chain_size[cell->name] = 1;
+ if (cell->cluster != ClusterId()) {
+ const auto base = cell_locs[cell->name];
+ for (auto child : cluster2cells.at(cell->cluster)) {
+ if (child->type == cell->type && child != cell)
+ chain_size[cell->name]++;
+ Loc offset = ctx->getClusterOffset(child);
+ cell_locs[child->name].x = std::max(0, std::min(max_x, base.x + offset.x));
+ cell_locs[child->name].y = std::max(0, std::min(max_y, base.y + offset.y));
+ }
+ }
+ }
+ }
+
+ // Run a function on all ports of a net - including the driver and all users
+ template <typename Tf> void foreach_port(NetInfo *net, Tf func)
+ {
+ if (net->driver.cell != nullptr)
+ func(net->driver, store_index<PortRef>());
+ for (auto usr : net->users.enumerate())
+ func(usr.value, usr.index);
+ }
+
+ // Build the system of equations for either X or Y
+ void build_equations(EquationSystem<double> &es, bool yaxis, int iter = -1)
+ {
+ // Return the x or y position of a cell, depending on ydir
+ auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
+ auto legal_pos = [&](CellInfo *cell) {
+ return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x;
+ };
+
+ es.reset();
+
+ for (auto &net : ctx->nets) {
+ NetInfo *ni = net.second.get();
+ if (ni->driver.cell == nullptr)
+ continue;
+ if (ni->users.empty())
+ continue;
+ if (cell_locs.at(ni->driver.cell->name).global)
+ continue;
+ // Find the bounds of the net in this axis, and the ports that correspond to these bounds
+ PortRef *lbport = nullptr, *ubport = nullptr;
+ int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min();
+ foreach_port(ni, [&](PortRef &port, store_index<PortRef> user_idx) {
+ int pos = cell_pos(port.cell);
+ if (pos < lbpos) {
+ lbpos = pos;
+ lbport = &port;
+ }
+ if (pos > ubpos) {
+ ubpos = pos;
+ ubport = &port;
+ }
+ });
+ NPNR_ASSERT(lbport != nullptr);
+ NPNR_ASSERT(ubport != nullptr);
+
+ auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) {
+ if (eqn.cell->udata == dont_solve)
+ return;
+ int row = eqn.cell->udata;
+ int v_pos = cell_pos(var.cell);
+ if (var.cell->udata != dont_solve) {
+ es.add_coeff(row, var.cell->udata, weight);
+ } else {
+ es.add_rhs(row, -v_pos * weight);
+ }
+ if (var.cell->cluster != ClusterId()) {
+ Loc offset = ctx->getClusterOffset(var.cell);
+ es.add_rhs(row, -(yaxis ? offset.y : offset.x) * weight);
+ }
+ };
+
+ // Add all relevant connections to the matrix
+ foreach_port(ni, [&](PortRef &port, store_index<PortRef> user_idx) {
+ int this_pos = cell_pos(port.cell);
+ auto process_arc = [&](PortRef *other) {
+ if (other == &port)
+ return;
+ int o_pos = cell_pos(other->cell);
+ double weight = 1.0 / (ni->users.entries() *
+ std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) *
+ std::abs(o_pos - this_pos)));
+
+ if (user_idx) {
+ weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)),
+ cfg.criticalityExponent));
+ }
+
+ // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
+ // if the other end isn't fixed
+ stamp_equation(port, port, weight);
+ stamp_equation(port, *other, -weight);
+ stamp_equation(*other, *other, weight);
+ stamp_equation(*other, port, -weight);
+ };
+ process_arc(lbport);
+ process_arc(ubport);
+ });
+ }
+ if (iter != -1) {
+ float alpha = cfg.alpha;
+ for (size_t row = 0; row < solve_cells.size(); row++) {
+ int l_pos = legal_pos(solve_cells.at(row));
+ int c_pos = cell_pos(solve_cells.at(row));
+
+ double weight =
+ alpha * iter /
+ std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(l_pos - c_pos));
+ // Add an arc from legalised to current position
+ es.add_coeff(row, row, weight);
+ es.add_rhs(row, weight * l_pos);
+ }
+ }
+ }
+
+ // Build the system of equations for either X or Y
+ void solve_equations(EquationSystem<double> &es, bool yaxis)
+ {
+ // Return the x or y position of a cell, depending on ydir
+ auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
+ std::vector<double> vals;
+ std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos);
+ es.solve(vals, cfg.solverTolerance);
+ for (size_t i = 0; i < vals.size(); i++)
+ if (yaxis) {
+ cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i);
+ cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i))));
+ if (solve_cells.at(i)->region != nullptr)
+ cell_locs.at(solve_cells.at(i)->name).y =
+ limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).y, true);
+ } else {
+ cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i);
+ cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i))));
+ if (solve_cells.at(i)->region != nullptr)
+ cell_locs.at(solve_cells.at(i)->name).x =
+ limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).x, false);
+ }
+ }
+
+ // Compute HPWL
+ wirelen_t total_hpwl()
+ {
+ wirelen_t hpwl = 0;
+ for (auto &net : ctx->nets) {
+ NetInfo *ni = net.second.get();
+ if (ni->driver.cell == nullptr)
+ continue;
+ CellLocation &drvloc = cell_locs.at(ni->driver.cell->name);
+ if (drvloc.global)
+ continue;
+ int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y;
+ for (auto &user : ni->users) {
+ CellLocation &usrloc = cell_locs.at(user.cell->name);
+ xmin = std::min(xmin, usrloc.x);
+ xmax = std::max(xmax, usrloc.x);
+ ymin = std::min(ymin, usrloc.y);
+ ymax = std::max(ymax, usrloc.y);
+ }
+ hpwl += cfg.hpwl_scale_x * (xmax - xmin) + cfg.hpwl_scale_y * (ymax - ymin);
+ }
+ return hpwl;
+ }
+
+ // Strict placement legalisation, performed after the initial HeAP spreading
+ void legalise_placement_strict(bool require_validity = false)
+ {
+ auto startt = std::chrono::high_resolution_clock::now();
+
+ // Unbind all cells placed in this solution
+ for (auto &cell : ctx->cells) {
+ CellInfo *ci = cell.second.get();
+ if (ci->bel != BelId() &&
+ (ci->udata != dont_solve ||
+ (ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve)))
+ ctx->unbindBel(ci->bel);
+ }
+
+ // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using
+ // the simple greedy largest-macro-first approach.
+ std::priority_queue<std::pair<int, IdString>> remaining;
+ for (auto cell : solve_cells) {
+ remaining.emplace(chain_size[cell->name], cell->name);
+ }
+ int ripup_radius = 2;
+ int total_iters = 0;
+ int total_iters_noreset = 0;
+ while (!remaining.empty()) {
+ auto top = remaining.top();
+ remaining.pop();
+
+ CellInfo *ci = ctx->cells.at(top.second).get();
+ // Was now placed, ignore
+ if (ci->bel != BelId())
+ continue;
+ // log_info(" Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx));
+ FastBels::FastBelsData *fb;
+ fast_bels.getBelsForCellType(ci->type, &fb);
+ int radius = 0;
+ int iter = 0;
+ int iter_at_radius = 0;
+ bool placed = false;
+ BelId bestBel;
+ int best_inp_len = std::numeric_limits<int>::max();
+
+ total_iters++;
+ total_iters_noreset++;
+ if (total_iters > int(solve_cells.size())) {
+ total_iters = 0;
+ ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2);
+ }
+
+ if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) {
+ log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n");
+ }
+
+ while (!placed) {
+
+ // Set a conservative timeout
+ if (iter > std::max(10000, 3 * int(ctx->cells.size())))
+ log_error("Unable to find legal placement for cell '%s', check constraints and utilisation.\n",
+ ctx->nameOf(ci));
+
+ // Determine a search radius around the solver location (which increases over time) that is clamped to
+ // the region constraint for the cell (if applicable)
+ int rx = radius, ry = radius;
+
+ if (ci->region != nullptr) {
+ rx = std::min(radius, (constraint_region_bounds[ci->region->name].x1 -
+ constraint_region_bounds[ci->region->name].x0) /
+ 2 +
+ 1);
+ ry = std::min(radius, (constraint_region_bounds[ci->region->name].y1 -
+ constraint_region_bounds[ci->region->name].y0) /
+ 2 +
+ 1);
+ }
+
+ // Pick a random X and Y location within our search radius
+ int nx = ctx->rng(2 * rx + 1) + std::max(cell_locs.at(ci->name).x - rx, 0);
+ int ny = ctx->rng(2 * ry + 1) + std::max(cell_locs.at(ci->name).y - ry, 0);
+
+ iter++;
+ iter_at_radius++;
+ if (iter >= (10 * (radius + 1))) {
+ // No luck yet, increase radius
+ radius = std::min(std::max(max_x, max_y), radius + 1);
+ while (radius < std::max(max_x, max_y)) {
+ // Keep increasing the radius until it will actually increase the number of cells we are
+ // checking (e.g. BRAM and DSP will not be in all cols/rows), so we don't waste effort
+ for (int x = std::max(0, cell_locs.at(ci->name).x - radius);
+ x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) {
+ if (x >= int(fb->size()))
+ break;
+ for (int y = std::max(0, cell_locs.at(ci->name).y - radius);
+ y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) {
+ if (y >= int(fb->at(x).size()))
+ break;
+ if (fb->at(x).at(y).size() > 0)
+ goto notempty;
+ }
+ }
+ radius = std::min(std::max(max_x, max_y), radius + 1);
+ }
+ notempty:
+ iter_at_radius = 0;
+ iter = 0;
+ }
+ // If our randomly chosen cooridnate is out of bounds; or points to a tile with no relevant bels; ignore
+ // it
+ if (nx < 0 || nx > max_x)
+ continue;
+ if (ny < 0 || ny > max_y)
+ continue;
+
+ if (nx >= int(fb->size()))
+ continue;
+ if (ny >= int(fb->at(nx).size()))
+ continue;
+ if (fb->at(nx).at(ny).empty())
+ continue;
+
+ // The number of attempts to find a location to try
+ int need_to_explore = 2 * radius;
+
+ // If we have found at least one legal location; and made enough attempts; assume it's good enough and
+ // finish
+ if (iter_at_radius >= need_to_explore && bestBel != BelId()) {
+ CellInfo *bound = ctx->getBoundBelCell(bestBel);
+ if (bound != nullptr) {
+ ctx->unbindBel(bound->bel);
+ remaining.emplace(chain_size[bound->name], bound->name);
+ }
+ ctx->bindBel(bestBel, ci, STRENGTH_WEAK);
+ placed = true;
+ Loc loc = ctx->getBelLocation(bestBel);
+ cell_locs[ci->name].x = loc.x;
+ cell_locs[ci->name].y = loc.y;
+ break;
+ }
+
+ if (ci->cluster == ClusterId()) {
+ // The case where we have no relative constraints
+ for (auto sz : fb->at(nx).at(ny)) {
+ // Look through all bels in this tile; checking region constraint if applicable
+ if (!ci->testRegion(sz))
+ continue;
+ // Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets)
+ // or occasionally trigger a tiebreaker
+ if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) {
+ CellInfo *bound = ctx->getBoundBelCell(sz);
+ if (bound != nullptr) {
+ // Only rip up cells without constraints
+ if (bound->cluster != ClusterId())
+ continue;
+ ctx->unbindBel(bound->bel);
+ }
+ // Provisionally bind the bel
+ ctx->bindBel(sz, ci, STRENGTH_WEAK);
+ if (require_validity && !ctx->isBelLocationValid(sz)) {
+ // New location is not legal; unbind the cell (and rebind the cell we ripped up if
+ // applicable)
+ ctx->unbindBel(sz);
+ if (bound != nullptr)
+ ctx->bindBel(sz, bound, STRENGTH_WEAK);
+ } else if (iter_at_radius < need_to_explore) {
+ // It's legal, but we haven't tried enough locations yet
+ ctx->unbindBel(sz);
+ if (bound != nullptr)
+ ctx->bindBel(sz, bound, STRENGTH_WEAK);
+ int input_len = 0;
+ // Compute a fast input wirelength metric at this bel; and save if better than our last
+ // try
+ for (auto &port : ci->ports) {
+ auto &p = port.second;
+ if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr)
+ continue;
+ CellInfo *drv = p.net->driver.cell;
+ auto drv_loc = cell_locs.find(drv->name);
+ if (drv_loc == cell_locs.end())
+ continue;
+ if (drv_loc->second.global)
+ continue;
+ input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny);
+ }
+ if (input_len < best_inp_len) {
+ best_inp_len = input_len;
+ bestBel = sz;
+ }
+ break;
+ } else {
+ // It's legal, and we've tried enough. Finish.
+ if (bound != nullptr)
+ remaining.emplace(chain_size[bound->name], bound->name);
+ Loc loc = ctx->getBelLocation(sz);
+ cell_locs[ci->name].x = loc.x;
+ cell_locs[ci->name].y = loc.y;
+ placed = true;
+ break;
+ }
+ }
+ }
+ } else {
+ // We do have relative constraints
+ for (auto sz : fb->at(nx).at(ny)) {
+ // List of cells and their destination
+ std::vector<std::pair<CellInfo *, BelId>> targets;
+ // List of bels we placed things at; and the cell that was there before if applicable
+ std::vector<std::pair<BelId, CellInfo *>> swaps_made;
+
+ if (!ctx->getClusterPlacement(ci->cluster, sz, targets))
+ continue;
+
+ for (auto &target : targets) {
+ // Check it satisfies the region constraint if applicable
+ if (!target.first->testRegion(target.second))
+ goto fail;
+ CellInfo *bound = ctx->getBoundBelCell(target.second);
+ // Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain
+ if (bound != nullptr)
+ if (bound->cluster != ClusterId() || bound->belStrength > STRENGTH_WEAK)
+ goto fail;
+ }
+ // Actually perform the move; keeping track of the moves we make so we can revert them if needed
+ for (auto &target : targets) {
+ CellInfo *bound = ctx->getBoundBelCell(target.second);
+ if (bound != nullptr)
+ ctx->unbindBel(target.second);
+ ctx->bindBel(target.second, target.first, STRENGTH_STRONG);
+ swaps_made.emplace_back(target.second, bound);
+ }
+ // Check that the move we have made is legal
+ for (auto &sm : swaps_made) {
+ if (!ctx->isBelLocationValid(sm.first))
+ goto fail;
+ }
+
+ if (false) {
+ fail:
+ // If the move turned out to be illegal; revert all the moves we made
+ for (auto &swap : swaps_made) {
+ ctx->unbindBel(swap.first);
+ if (swap.second != nullptr)
+ ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK);
+ }
+ continue;
+ }
+ for (auto &target : targets) {
+ Loc loc = ctx->getBelLocation(target.second);
+ cell_locs[target.first->name].x = loc.x;
+ cell_locs[target.first->name].y = loc.y;
+ // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z);
+ }
+ for (auto &swap : swaps_made) {
+ // Where we have ripped up cells; add them to the queue
+ if (swap.second != nullptr)
+ remaining.emplace(chain_size[swap.second->name], swap.second->name);
+ }
+
+ placed = true;
+ break;
+ }
+ }
+ }
+ }
+ auto endt = std::chrono::high_resolution_clock::now();
+ sl_time += std::chrono::duration<float>(endt - startt).count();
+ }
+ // Implementation of the cut-based spreading as described in the HeAP/SimPL papers
+
+ template <typename T> T limit_to_reg(Region *reg, T val, bool dir)
+ {
+ if (reg == nullptr)
+ return val;
+ int limit_low = dir ? constraint_region_bounds[reg->name].y0 : constraint_region_bounds[reg->name].x0;
+ int limit_high = dir ? constraint_region_bounds[reg->name].y1 : constraint_region_bounds[reg->name].x1;
+ return std::max<T>(std::min<T>(val, limit_high), limit_low);
+ }
+
+ struct ChainExtent
+ {
+ int x0, y0, x1, y1;
+ };
+
+ struct SpreaderRegion
+ {
+ int id;
+ int x0, y0, x1, y1;
+ std::vector<int> cells, bels;
+ bool overused(float beta) const
+ {
+ for (size_t t = 0; t < cells.size(); t++) {
+ if (bels.at(t) < 4) {
+ if (cells.at(t) > bels.at(t))
+ return true;
+ } else {
+ if (cells.at(t) > beta * bels.at(t))
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+
+ class CutSpreader
+ {
+ public:
+ CutSpreader(HeAPPlacer *p, const pool<BelBucketId> &buckets) : p(p), ctx(p->ctx), buckets(buckets)
+ {
+ // Get fast BELs data for all buckets being Cut/Spread.
+ size_t idx = 0;
+ for (BelBucketId bucket : buckets) {
+ type_index[bucket] = idx;
+ FastBels::FastBelsData *fast_bels;
+ p->fast_bels.getBelsForBelBucket(bucket, &fast_bels);
+ fb.push_back(fast_bels);
+ ++idx;
+ NPNR_ASSERT(fb.size() == idx);
+ }
+ }
+ static int seq;
+ void run()
+ {
+ auto startt = std::chrono::high_resolution_clock::now();
+ init();
+ find_overused_regions();
+ for (auto &r : regions) {
+ if (merged_regions.count(r.id))
+ continue;
+#if 0
+ log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells,
+ r.bels);
+#endif
+ }
+ expand_regions();
+ std::queue<std::pair<int, bool>> workqueue;
+#if 0
+ std::vector<std::pair<double, double>> orig;
+ if (ctx->debug)
+ for (auto c : p->solve_cells)
+ orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy);
+#endif
+ for (auto &r : regions) {
+ if (merged_regions.count(r.id))
+ continue;
+#if 0
+ for (auto t : sorted(beltype)) {
+ log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", t.c_str(ctx), r.x0, r.y0, r.x1, r.y1,
+ r.cells.at(type_index.at(t)), r.bels.at(type_index.at(t)));
+ }
+
+#endif
+ workqueue.emplace(r.id, false);
+ }
+ while (!workqueue.empty()) {
+ auto front = workqueue.front();
+ workqueue.pop();
+ auto &r = regions.at(front.first);
+ if (std::all_of(r.cells.begin(), r.cells.end(), [](int x) { return x == 0; }))
+ continue;
+ auto res = cut_region(r, front.second);
+ if (res) {
+ workqueue.emplace(res->first, !front.second);
+ workqueue.emplace(res->second, !front.second);
+ } else {
+ // Try the other dir, in case stuck in one direction only
+ auto res2 = cut_region(r, !front.second);
+ if (res2) {
+ workqueue.emplace(res2->first, front.second);
+ workqueue.emplace(res2->second, front.second);
+ }
+ }
+ }
+#if 0
+ if (ctx->debug) {
+ std::ofstream sp("spread" + std::to_string(seq) + ".csv");
+ for (size_t i = 0; i < p->solve_cells.size(); i++) {
+ auto &c = p->solve_cells.at(i);
+ if (c->type != beltype)
+ continue;
+ sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl;
+ }
+ std::ofstream oc("cells" + std::to_string(seq) + ".csv");
+ for (size_t y = 0; y <= p->max_y; y++) {
+ for (size_t x = 0; x <= p->max_x; x++) {
+ oc << cells_at_location.at(x).at(y).size() << ", ";
+ }
+ oc << std::endl;
+ }
+ ++seq;
+ }
+#endif
+ auto endt = std::chrono::high_resolution_clock::now();
+ p->cl_time += std::chrono::duration<float>(endt - startt).count();
+ }
+
+ private:
+ HeAPPlacer *p;
+ Context *ctx;
+ pool<BelBucketId> buckets;
+ dict<BelBucketId, size_t> type_index;
+ std::vector<std::vector<std::vector<int>>> occupancy;
+ std::vector<std::vector<int>> groups;
+ std::vector<std::vector<ChainExtent>> chaines;
+ std::map<IdString, ChainExtent> cell_extents;
+
+ std::vector<std::vector<std::vector<std::vector<BelId>>> *> fb;
+
+ std::vector<SpreaderRegion> regions;
+ pool<int> merged_regions;
+ // Cells at a location, sorted by real (not integer) x and y
+ std::vector<std::vector<std::vector<CellInfo *>>> cells_at_location;
+
+ int occ_at(int x, int y, int type) { return occupancy.at(x).at(y).at(type); }
+
+ int bels_at(int x, int y, int type)
+ {
+ if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size()))
+ return 0;
+ return int(fb.at(type)->at(x).at(y).size());
+ }
+
+ bool is_cell_fixed(const CellInfo &cell) const
+ {
+ return buckets.count(ctx->getBelBucketForCellType(cell.type)) == 0;
+ }
+
+ size_t cell_index(const CellInfo &cell) const { return type_index.at(ctx->getBelBucketForCellType(cell.type)); }
+
+ void init()
+ {
+ occupancy.resize(p->max_x + 1,
+ std::vector<std::vector<int>>(p->max_y + 1, std::vector<int>(buckets.size(), 0)));
+ groups.resize(p->max_x + 1, std::vector<int>(p->max_y + 1, -1));
+ chaines.resize(p->max_x + 1, std::vector<ChainExtent>(p->max_y + 1));
+ cells_at_location.resize(p->max_x + 1, std::vector<std::vector<CellInfo *>>(p->max_y + 1));
+ for (int x = 0; x <= p->max_x; x++)
+ for (int y = 0; y <= p->max_y; y++) {
+ for (int t = 0; t < int(buckets.size()); t++) {
+ occupancy.at(x).at(y).at(t) = 0;
+ }
+ groups.at(x).at(y) = -1;
+ chaines.at(x).at(y) = {x, y, x, y};
+ }
+
+ auto set_chain_ext = [&](IdString cell, int x, int y) {
+ if (!cell_extents.count(cell))
+ cell_extents[cell] = {x, y, x, y};
+ else {
+ cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x);
+ cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y);
+ cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x);
+ cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y);
+ }
+ };
+
+ for (auto &cell_loc : p->cell_locs) {
+ IdString cell_name = cell_loc.first;
+ const CellInfo &cell = *ctx->cells.at(cell_name);
+ const CellLocation &loc = cell_loc.second;
+ if (is_cell_fixed(cell)) {
+ continue;
+ }
+
+ if (cell.belStrength > STRENGTH_STRONG) {
+ continue;
+ }
+
+ occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++;
+
+ // Compute ultimate extent of each chain root
+ if (cell.cluster != ClusterId()) {
+ set_chain_ext(ctx->getClusterRootCell(cell.cluster)->name, loc.x, loc.y);
+ }
+ }
+
+ for (auto &cell_loc : p->cell_locs) {
+ IdString cell_name = cell_loc.first;
+ const CellInfo &cell = *ctx->cells.at(cell_name);
+ const CellLocation &loc = cell_loc.second;
+ if (is_cell_fixed(cell)) {
+ continue;
+ }
+
+ if (cell.belStrength > STRENGTH_STRONG) {
+ continue;
+ }
+
+ // Transfer chain extents to the actual chains structure
+ ChainExtent *ce = nullptr;
+ if (cell.cluster != ClusterId()) {
+ ce = &(cell_extents.at(ctx->getClusterRootCell(cell.cluster)->name));
+ }
+
+ if (ce) {
+ auto &lce = chaines.at(loc.x).at(loc.y);
+ lce.x0 = std::min(lce.x0, ce->x0);
+ lce.y0 = std::min(lce.y0, ce->y0);
+ lce.x1 = std::max(lce.x1, ce->x1);
+ lce.y1 = std::max(lce.y1, ce->y1);
+ }
+ }
+
+ for (auto cell : p->solve_cells) {
+ if (is_cell_fixed(*cell)) {
+ continue;
+ }
+
+ cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell);
+ }
+ }
+
+ void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee)
+ {
+ // Prevent grow_region from recursing while doing this
+ for (int x = mergee.x0; x <= mergee.x1; x++) {
+ for (int y = mergee.y0; y <= mergee.y1; y++) {
+ // log_info("%d %d\n", groups.at(x).at(y), mergee.id);
+ NPNR_ASSERT(groups.at(x).at(y) == mergee.id);
+ groups.at(x).at(y) = merged.id;
+ for (size_t t = 0; t < buckets.size(); t++) {
+ merged.cells.at(t) += occ_at(x, y, t);
+ merged.bels.at(t) += bels_at(x, y, t);
+ }
+ }
+ }
+
+ merged_regions.insert(mergee.id);
+ grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1);
+ }
+
+ void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false)
+ {
+ // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1);
+ if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init)
+ return;
+ int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1;
+ r.x0 = std::min(r.x0, x0);
+ r.y0 = std::min(r.y0, y0);
+ r.x1 = std::max(r.x1, x1);
+ r.y1 = std::max(r.y1, y1);
+
+ auto process_location = [&](int x, int y) {
+ // Merge with any overlapping regions
+ if (groups.at(x).at(y) == -1) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ r.bels.at(t) += bels_at(x, y, t);
+ r.cells.at(t) += occ_at(x, y, t);
+ }
+ }
+
+ if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id)
+ merge_regions(r, regions.at(groups.at(x).at(y)));
+ groups.at(x).at(y) = r.id;
+ // Grow to cover any chains
+ auto &chaine = chaines.at(x).at(y);
+ grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1);
+ };
+ for (int x = r.x0; x < old_x0; x++)
+ for (int y = r.y0; y <= r.y1; y++)
+ process_location(x, y);
+ for (int x = old_x1 + 1; x <= x1; x++)
+ for (int y = r.y0; y <= r.y1; y++)
+ process_location(x, y);
+ for (int y = r.y0; y < old_y0; y++)
+ for (int x = r.x0; x <= r.x1; x++)
+ process_location(x, y);
+ for (int y = old_y1 + 1; y <= r.y1; y++)
+ for (int x = r.x0; x <= r.x1; x++)
+ process_location(x, y);
+ }
+
+ void find_overused_regions()
+ {
+ for (int x = 0; x <= p->max_x; x++)
+ for (int y = 0; y <= p->max_y; y++) {
+ // Either already in a group, or not overutilised. Ignore
+ if (groups.at(x).at(y) != -1)
+ continue;
+ bool overutilised = false;
+ for (size_t t = 0; t < buckets.size(); t++) {
+ if (occ_at(x, y, t) > bels_at(x, y, t)) {
+ overutilised = true;
+ break;
+ }
+ }
+
+ if (!overutilised)
+ continue;
+ // log_info("%d %d %d\n", x, y, occ_at(x, y));
+ int id = int(regions.size());
+ groups.at(x).at(y) = id;
+ SpreaderRegion reg;
+ reg.id = id;
+ reg.x0 = reg.x1 = x;
+ reg.y0 = reg.y1 = y;
+ for (size_t t = 0; t < buckets.size(); t++) {
+ reg.bels.push_back(bels_at(x, y, t));
+ reg.cells.push_back(occ_at(x, y, t));
+ }
+ // Make sure we cover carries, etc
+ grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true);
+
+ bool expanded = true;
+ while (expanded) {
+ expanded = false;
+ // Keep trying expansion in x and y, until we find no over-occupancy cells
+ // or hit grouped cells
+
+ // First try expanding in x
+ if (reg.x1 < p->max_x) {
+ bool over_occ_x = false;
+ for (int y1 = reg.y0; y1 <= reg.y1; y1++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ if (occ_at(reg.x1 + 1, y1, t) > bels_at(reg.x1 + 1, y1, t)) {
+ over_occ_x = true;
+ break;
+ }
+ }
+ }
+ if (over_occ_x) {
+ expanded = true;
+ grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1);
+ }
+ }
+
+ if (reg.y1 < p->max_y) {
+ bool over_occ_y = false;
+ for (int x1 = reg.x0; x1 <= reg.x1; x1++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ if (occ_at(x1, reg.y1 + 1, t) > bels_at(x1, reg.y1 + 1, t)) {
+ over_occ_y = true;
+ break;
+ }
+ }
+ }
+ if (over_occ_y) {
+ expanded = true;
+ grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1);
+ }
+ }
+ }
+ regions.push_back(reg);
+ }
+ }
+
+ void expand_regions()
+ {
+ std::queue<int> overu_regions;
+ float beta = p->cfg.beta;
+ for (auto &r : regions) {
+ if (!merged_regions.count(r.id) && r.overused(beta))
+ overu_regions.push(r.id);
+ }
+ while (!overu_regions.empty()) {
+ int rid = overu_regions.front();
+ overu_regions.pop();
+ if (merged_regions.count(rid))
+ continue;
+ auto &reg = regions.at(rid);
+ while (reg.overused(beta)) {
+ bool changed = false;
+ for (int j = 0; j < p->cfg.spread_scale_x; j++) {
+ if (reg.x0 > 0) {
+ grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1);
+ changed = true;
+ if (!reg.overused(beta))
+ break;
+ }
+ if (reg.x1 < p->max_x) {
+ grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1);
+ changed = true;
+ if (!reg.overused(beta))
+ break;
+ }
+ }
+ for (int j = 0; j < p->cfg.spread_scale_y; j++) {
+ if (reg.y0 > 0) {
+ grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1);
+ changed = true;
+ if (!reg.overused(beta))
+ break;
+ }
+ if (reg.y1 < p->max_y) {
+ grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1);
+ changed = true;
+ if (!reg.overused(beta))
+ break;
+ }
+ }
+ if (!changed) {
+ for (auto bucket : buckets) {
+ if (reg.cells > reg.bels) {
+ IdString bucket_name = ctx->getBelBucketName(bucket);
+ log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0,
+ reg.x1, reg.y1, reg.cells.at(type_index.at(bucket)), bucket_name.c_str(ctx));
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ // Implementation of the recursive cut-based spreading as described in the HeAP paper
+ // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir
+
+ std::vector<CellInfo *> cut_cells;
+
+ boost::optional<std::pair<int, int>> cut_region(SpreaderRegion &r, bool dir)
+ {
+ cut_cells.clear();
+ auto &cal = cells_at_location;
+ int total_cells = 0, total_bels = 0;
+ for (int x = r.x0; x <= r.x1; x++) {
+ for (int y = r.y0; y <= r.y1; y++) {
+ std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells));
+ for (size_t t = 0; t < buckets.size(); t++)
+ total_bels += bels_at(x, y, t);
+ }
+ }
+ for (auto &cell : cut_cells) {
+ total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1;
+ }
+ std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) {
+ return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy)
+ : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx);
+ });
+
+ if (cut_cells.size() < 2)
+ return {};
+ // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut
+ int pivot_cells = 0;
+ int pivot = 0;
+ for (auto &cell : cut_cells) {
+ pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1;
+ if (pivot_cells >= total_cells / 2)
+ break;
+ pivot++;
+ }
+ if (pivot >= int(cut_cells.size())) {
+ pivot = int(cut_cells.size()) - 1;
+ }
+ // log_info("orig pivot %d/%d lc %d rc %d\n", pivot, int(cut_cells.size()), pivot_cells, total_cells -
+ // pivot_cells);
+
+ // Find the clearance required either side of the pivot
+ int clearance_l = 0, clearance_r = 0;
+ for (size_t i = 0; i < cut_cells.size(); i++) {
+ int size;
+ if (cell_extents.count(cut_cells.at(i)->name)) {
+ auto &ce = cell_extents.at(cut_cells.at(i)->name);
+ size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1);
+ } else {
+ size = 1;
+ }
+ if (int(i) < pivot)
+ clearance_l = std::max(clearance_l, size);
+ else
+ clearance_r = std::max(clearance_r, size);
+ }
+ // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains
+ // still fit
+
+ // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any
+ // bels of the appropriate type
+ int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1;
+ while (trimmed_l < (dir ? r.y1 : r.x1)) {
+ bool have_bels = false;
+ for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i, t) > 0) {
+ have_bels = true;
+ break;
+ }
+ }
+ }
+
+ if (have_bels)
+ break;
+
+ trimmed_l++;
+ }
+ while (trimmed_r > (dir ? r.y0 : r.x0)) {
+ bool have_bels = false;
+ for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i, t) > 0) {
+ have_bels = true;
+ break;
+ }
+ }
+ }
+
+ if (have_bels)
+ break;
+
+ trimmed_r--;
+ }
+ // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r);
+ if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r))
+ return {};
+ // Now find the initial target cut that minimises utilisation imbalance, whilst
+ // meeting the clearance requirements for any large macros
+ std::vector<int> left_cells_v(buckets.size(), 0), right_cells_v(buckets.size(), 0);
+ std::vector<int> left_bels_v(buckets.size(), 0), right_bels_v(r.bels);
+ for (int i = 0; i <= pivot; i++)
+ left_cells_v.at(cell_index(*cut_cells.at(i))) +=
+ p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1;
+ for (int i = pivot + 1; i < int(cut_cells.size()); i++)
+ right_cells_v.at(cell_index(*cut_cells.at(i))) +=
+ p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1;
+
+ int best_tgt_cut = -1;
+ double best_deltaU = std::numeric_limits<double>::max();
+ // std::pair<int, int> target_cut_bels;
+ std::vector<int> slither_bels(buckets.size(), 0);
+ for (int i = trimmed_l; i <= trimmed_r; i++) {
+ for (size_t t = 0; t < buckets.size(); t++)
+ slither_bels.at(t) = 0;
+ for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) {
+ for (size_t t = 0; t < buckets.size(); t++)
+ slither_bels.at(t) += dir ? bels_at(j, i, t) : bels_at(i, j, t);
+ }
+ for (size_t t = 0; t < buckets.size(); t++) {
+ left_bels_v.at(t) += slither_bels.at(t);
+ right_bels_v.at(t) -= slither_bels.at(t);
+ }
+
+ if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) {
+ // Solution is potentially valid
+ double aU = 0;
+ for (size_t t = 0; t < buckets.size(); t++)
+ aU += (left_cells_v.at(t) + right_cells_v.at(t)) *
+ std::abs(double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) -
+ double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1)));
+ if (aU < best_deltaU) {
+ best_deltaU = aU;
+ best_tgt_cut = i;
+ }
+ }
+ }
+ if (best_tgt_cut == -1)
+ return {};
+ // left_bels = target_cut_bels.first;
+ // right_bels = target_cut_bels.second;
+ for (size_t t = 0; t < buckets.size(); t++) {
+ left_bels_v.at(t) = 0;
+ right_bels_v.at(t) = 0;
+ }
+ for (int x = r.x0; x <= (dir ? r.x1 : best_tgt_cut); x++)
+ for (int y = r.y0; y <= (dir ? best_tgt_cut : r.y1); y++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ left_bels_v.at(t) += bels_at(x, y, t);
+ }
+ }
+ for (int x = dir ? r.x0 : (best_tgt_cut + 1); x <= r.x1; x++)
+ for (int y = dir ? (best_tgt_cut + 1) : r.y0; y <= r.y1; y++) {
+ for (size_t t = 0; t < buckets.size(); t++) {
+ right_bels_v.at(t) += bels_at(x, y, t);
+ }
+ }
+ if (std::accumulate(left_bels_v.begin(), left_bels_v.end(), 0) == 0 ||
+ std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0) == 0)
+ return {};
+
+ // Perturb the source cut to eliminate overutilisation
+ auto is_part_overutil = [&](bool r) {
+ double delta = 0;
+ for (size_t t = 0; t < left_cells_v.size(); t++) {
+ delta += double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) -
+ double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1));
+ }
+ return r ? delta < 0 : delta > 0;
+ };
+ while (pivot > 0 && is_part_overutil(false)) {
+ auto &move_cell = cut_cells.at(pivot);
+ int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1;
+ left_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size;
+ right_cells_v.at(cell_index(*cut_cells.at(pivot))) += size;
+ pivot--;
+ }
+ while (pivot < int(cut_cells.size()) - 1 && is_part_overutil(true)) {
+ auto &move_cell = cut_cells.at(pivot + 1);
+ int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1;
+ left_cells_v.at(cell_index(*cut_cells.at(pivot))) += size;
+ right_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size;
+ pivot++;
+ }
+
+ // Split regions into bins, and then spread cells by linear interpolation within those bins
+ auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) {
+ int N = cells_end - cells_start;
+ if (N <= 2) {
+ for (int i = cells_start; i < cells_end; i++) {
+ auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy
+ : p->cell_locs.at(cut_cells.at(i)->name).rawx;
+ pos = area_l + i * ((area_r - area_l) / N);
+ }
+ return;
+ }
+ // Split region into up to 10 (K) bins
+ int K = std::min<int>(N, 10);
+ std::vector<std::pair<int, double>> bin_bounds; // [(cell start, area start)]
+ bin_bounds.emplace_back(cells_start, area_l);
+ for (int i = 1; i < K; i++)
+ bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K);
+ bin_bounds.emplace_back(cells_end, area_r + 0.99);
+ for (int i = 0; i < K; i++) {
+ auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1);
+ double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy
+ : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx;
+ double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy
+ : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx;
+ double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left);
+ for (int j = bl.first; j < br.first; j++) {
+ Region *cr = cut_cells.at(j)->region;
+ if (cr != nullptr) {
+ // Limit spreading bounds to constraint region; if applicable
+ double brsc = p->limit_to_reg(cr, br.second, dir);
+ double blsc = p->limit_to_reg(cr, bl.second, dir);
+ double mr = (brsc - blsc) / std::max(0.00001, orig_right - orig_left);
+ auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy
+ : p->cell_locs.at(cut_cells.at(j)->name).rawx;
+ NPNR_ASSERT(pos >= orig_left && pos <= orig_right);
+ pos = blsc + mr * (pos - orig_left);
+ } else {
+ auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy
+ : p->cell_locs.at(cut_cells.at(j)->name).rawx;
+ NPNR_ASSERT(pos >= orig_left && pos <= orig_right);
+ pos = bl.second + m * (pos - orig_left);
+ }
+ }
+ }
+ };
+ spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut);
+ spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r);
+ // Update various data structures
+ for (int x = r.x0; x <= r.x1; x++)
+ for (int y = r.y0; y <= r.y1; y++) {
+ cells_at_location.at(x).at(y).clear();
+ }
+ for (auto cell : cut_cells) {
+ auto &cl = p->cell_locs.at(cell->name);
+ cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx)));
+ cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy)));
+ cells_at_location.at(cl.x).at(cl.y).push_back(cell);
+ }
+ SpreaderRegion rl, rr;
+ rl.id = int(regions.size());
+ rl.x0 = r.x0;
+ rl.y0 = r.y0;
+ rl.x1 = dir ? r.x1 : best_tgt_cut;
+ rl.y1 = dir ? best_tgt_cut : r.y1;
+ rl.cells = left_cells_v;
+ rl.bels = left_bels_v;
+ rr.id = int(regions.size()) + 1;
+ rr.x0 = dir ? r.x0 : (best_tgt_cut + 1);
+ rr.y0 = dir ? (best_tgt_cut + 1) : r.y0;
+ rr.x1 = r.x1;
+ rr.y1 = r.y1;
+ rr.cells = right_cells_v;
+ rr.bels = right_bels_v;
+ regions.push_back(rl);
+ regions.push_back(rr);
+ for (int x = rl.x0; x <= rl.x1; x++)
+ for (int y = rl.y0; y <= rl.y1; y++)
+ groups.at(x).at(y) = rl.id;
+ for (int x = rr.x0; x <= rr.x1; x++)
+ for (int y = rr.y0; y <= rr.y1; y++)
+ groups.at(x).at(y) = rr.id;
+ return std::make_pair(rl.id, rr.id);
+ };
+ };
+ typedef decltype(CellInfo::udata) cell_udata_t;
+ cell_udata_t dont_solve = std::numeric_limits<cell_udata_t>::max();
+};
+int HeAPPlacer::CutSpreader::seq = 0;
+
+bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); }
+
+PlacerHeapCfg::PlacerHeapCfg(Context *ctx)
+{
+ alpha = ctx->setting<float>("placerHeap/alpha");
+ beta = ctx->setting<float>("placerHeap/beta");
+ criticalityExponent = ctx->setting<int>("placerHeap/criticalityExponent");
+ timingWeight = ctx->setting<int>("placerHeap/timingWeight");
+ parallelRefine = ctx->setting<bool>("placerHeap/parallelRefine", false);
+
+ timing_driven = ctx->setting<bool>("timing_driven");
+ solverTolerance = 1e-5;
+ placeAllAtOnce = false;
+
+ hpwl_scale_x = 1;
+ hpwl_scale_y = 1;
+ spread_scale_x = 1;
+ spread_scale_y = 1;
+}
+
+NEXTPNR_NAMESPACE_END
+
+#else
+
+#include "log.h"
+#include "nextpnr.h"
+#include "placer_heap.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+bool placer_heap(Context *ctx, PlacerHeapCfg cfg)
+{
+ log_error("nextpnr was built without the HeAP placer\n");
+ return false;
+}
+
+PlacerHeapCfg::PlacerHeapCfg(Context *ctx) {}
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/placer_heap.h b/common/place/placer_heap.h
new file mode 100644
index 00000000..9c62869e
--- /dev/null
+++ b/common/place/placer_heap.h
@@ -0,0 +1,58 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2019 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * [[cite]] HeAP
+ * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson
+ * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf
+ *
+ * [[cite]] SimPL
+ * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov
+ * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf
+ */
+
+#ifndef PLACER_HEAP_H
+#define PLACER_HEAP_H
+#include "log.h"
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct PlacerHeapCfg
+{
+ PlacerHeapCfg(Context *ctx);
+
+ float alpha, beta;
+ float criticalityExponent;
+ float timingWeight;
+ bool timing_driven;
+ float solverTolerance;
+ bool placeAllAtOnce;
+ bool parallelRefine;
+
+ int hpwl_scale_x, hpwl_scale_y;
+ int spread_scale_x, spread_scale_y;
+
+ // These cell types will be randomly locked to prevent singular matrices
+ pool<IdString> ioBufTypes;
+ // These cell types are part of the same unit (e.g. slices split into
+ // components) so will always be spread together
+ std::vector<pool<BelBucketId>> cellGroups;
+};
+
+extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg);
+NEXTPNR_NAMESPACE_END
+#endif
diff --git a/common/place/timing_opt.cc b/common/place/timing_opt.cc
new file mode 100644
index 00000000..f9246292
--- /dev/null
+++ b/common/place/timing_opt.cc
@@ -0,0 +1,561 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+ * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells
+ * on a critical path
+ *
+ * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs"
+ * https://www.cerc.utexas.edu/utda/publications/C205.pdf
+ *
+ * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles,
+ * and deal with the fact that not every cell on the crit path may be swappable.
+ */
+
+#include "timing_opt.h"
+#include <boost/range/adaptor/reversed.hpp>
+#include <queue>
+#include "nextpnr.h"
+#include "timing.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+class TimingOptimiser
+{
+ public:
+ TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg), tmg(ctx){};
+ bool optimise()
+ {
+ log_info("Running timing-driven placement optimisation...\n");
+ ctx->lock();
+ if (ctx->verbose)
+ timing_analysis(ctx, false, true, false, false);
+ tmg.setup();
+ for (int i = 0; i < 30; i++) {
+ log_info(" Iteration %d...\n", i);
+ tmg.run();
+ setup_delay_limits();
+ auto crit_paths = find_crit_paths(0.98, 50000);
+ for (auto &path : crit_paths)
+ optimise_path(path);
+ if (ctx->verbose)
+ timing_analysis(ctx, false, true, false, false);
+ }
+ ctx->unlock();
+ return true;
+ }
+
+ private:
+ void setup_delay_limits()
+ {
+ max_net_delay.clear();
+ for (auto &net : ctx->nets) {
+ NetInfo *ni = net.second.get();
+ if (ni->driver.cell == nullptr)
+ continue;
+ for (auto usr : ni->users) {
+ max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max();
+ }
+ for (auto usr : ni->users) {
+ delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr);
+ delay_t slack = tmg.get_setup_slack(CellPortKey(usr));
+ delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr));
+ if (slack == std::numeric_limits<delay_t>::max())
+ continue;
+ max_net_delay[std::make_pair(usr.cell->name, usr.port)] = net_delay + ((slack - domain_slack) / 10);
+ }
+ }
+ }
+
+ bool check_cell_delay_limits(CellInfo *cell)
+ {
+ for (const auto &port : cell->ports) {
+ int nc;
+ if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE)
+ continue;
+ NetInfo *net = port.second.net;
+ if (net == nullptr)
+ continue;
+ if (port.second.type == PORT_IN) {
+ if (net->driver.cell == nullptr || net->driver.cell->bel == BelId())
+ continue;
+ for (auto user : net->users) {
+ if (user.cell == cell && user.port == port.first) {
+ if (ctx->predictArcDelay(net, user) >
+ 1.1 * max_net_delay.at(std::make_pair(cell->name, port.first)))
+ return false;
+ }
+ }
+
+ } else if (port.second.type == PORT_OUT) {
+ for (auto user : net->users) {
+ // This could get expensive for high-fanout nets??
+ BelId dstBel = user.cell->bel;
+ if (dstBel == BelId())
+ continue;
+ if (ctx->predictArcDelay(net, user) >
+ 1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) {
+
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ BelId cell_swap_bel(CellInfo *cell, BelId newBel)
+ {
+ BelId oldBel = cell->bel;
+ if (oldBel == newBel)
+ return oldBel;
+ CellInfo *other_cell = ctx->getBoundBelCell(newBel);
+ NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK);
+ ctx->unbindBel(oldBel);
+ if (other_cell != nullptr) {
+ ctx->unbindBel(newBel);
+ ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK);
+ }
+ ctx->bindBel(newBel, cell, STRENGTH_WEAK);
+ return oldBel;
+ }
+
+ // Check that a series of moves are both legal and remain within maximum delay bounds
+ // Moves are specified as a vector of pairs <cell, oldBel>
+ bool acceptable_move(std::vector<std::pair<CellInfo *, BelId>> &move, bool check_delays = true)
+ {
+ for (auto &entry : move) {
+ if (!ctx->isBelLocationValid(entry.first->bel))
+ return false;
+ if (!ctx->isBelLocationValid(entry.second))
+ return false;
+ if (!check_delays)
+ continue;
+ if (!check_cell_delay_limits(entry.first))
+ return false;
+ // We might have swapped another cell onto the original bel. Check this for max delay violations
+ // too
+ CellInfo *swapped = ctx->getBoundBelCell(entry.second);
+ if (swapped != nullptr && !check_cell_delay_limits(swapped))
+ return false;
+ }
+ return true;
+ }
+
+ int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap)
+ {
+ BelId curr = cell->bel;
+ Loc curr_loc = ctx->getBelLocation(curr);
+ int found_count = 0;
+ cell_neighbour_bels[cell->name] = pool<BelId>{};
+ for (int dy = -d; dy <= d; dy++) {
+ for (int dx = -d; dx <= d; dx++) {
+ // Go through all the Bels at this location
+ // First, find all bels of the correct type that are either unbound or bound normally
+ // Strongly bound bels are ignored
+ // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros
+ std::vector<BelId> free_bels_at_loc;
+ std::vector<BelId> bound_bels_at_loc;
+ for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) {
+ if (!ctx->isValidBelForCellType(cell->type, bel))
+ continue;
+ CellInfo *bound = ctx->getBoundBelCell(bel);
+ if (bound == nullptr) {
+ free_bels_at_loc.push_back(bel);
+ } else if (bound->belStrength <= STRENGTH_WEAK && bound->cluster == ClusterId()) {
+ bound_bels_at_loc.push_back(bel);
+ }
+ }
+ BelId candidate;
+
+ while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) {
+ BelId try_bel;
+ if (!free_bels_at_loc.empty()) {
+ int try_idx = ctx->rng(int(free_bels_at_loc.size()));
+ try_bel = free_bels_at_loc.at(try_idx);
+ free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx);
+ } else {
+ int try_idx = ctx->rng(int(bound_bels_at_loc.size()));
+ try_bel = bound_bels_at_loc.at(try_idx);
+ bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx);
+ }
+ if (bel_candidate_cells.count(try_bel) && !allow_swap) {
+ // Overlap is only allowed if it is with the previous cell (this is handled by removing those
+ // edges in the graph), or if allow_swap is true to deal with cases where overlap means few
+ // neighbours are identified
+ if (bel_candidate_cells.at(try_bel).size() > 1 ||
+ (bel_candidate_cells.at(try_bel).size() == 1 &&
+ *(bel_candidate_cells.at(try_bel).begin()) != prev_cell))
+ continue;
+ }
+ // TODO: what else to check here?
+ candidate = try_bel;
+ break;
+ }
+
+ if (candidate != BelId()) {
+ cell_neighbour_bels[cell->name].insert(candidate);
+ bel_candidate_cells[candidate].insert(cell->name);
+ // Work out if we need to delete any overlap
+ std::vector<IdString> overlap;
+ for (auto other : bel_candidate_cells[candidate])
+ if (other != cell->name && other != prev_cell)
+ overlap.push_back(other);
+ if (overlap.size() > 0)
+ NPNR_ASSERT(allow_swap);
+ for (auto ov : overlap) {
+ bel_candidate_cells[candidate].erase(ov);
+ cell_neighbour_bels[ov].erase(candidate);
+ }
+ }
+ }
+ }
+ return found_count;
+ }
+
+ std::vector<std::vector<PortRef *>> find_crit_paths(float crit_thresh, size_t max_count)
+ {
+ std::vector<std::vector<PortRef *>> crit_paths;
+ std::vector<std::pair<NetInfo *, store_index<PortRef>>> crit_nets;
+ std::vector<IdString> netnames;
+ std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames),
+ [](const std::pair<IdString, std::unique_ptr<NetInfo>> &kv) { return kv.first; });
+ ctx->sorted_shuffle(netnames);
+ for (auto net : netnames) {
+ if (crit_nets.size() >= max_count)
+ break;
+ float highest_crit = 0;
+ store_index<PortRef> crit_user_idx{};
+ NetInfo *ni = ctx->nets.at(net).get();
+ for (auto usr : ni->users.enumerate()) {
+ float crit = tmg.get_criticality(CellPortKey(usr.value));
+ if (crit > highest_crit) {
+ highest_crit = crit;
+ crit_user_idx = usr.index;
+ }
+ }
+ if (highest_crit > crit_thresh)
+ crit_nets.emplace_back(ni, crit_user_idx);
+ }
+
+ pool<PortRef *, hash_ptr_ops> used_ports;
+
+ for (auto crit_net : crit_nets) {
+
+ if (used_ports.count(&(crit_net.first->users.at(crit_net.second))))
+ continue;
+
+ std::deque<PortRef *> crit_path;
+
+ // FIXME: This will fail badly on combinational loops
+
+ // Iterate backwards following greatest criticality
+ NetInfo *back_cursor = crit_net.first;
+ while (back_cursor != nullptr) {
+ float max_crit = 0;
+ std::pair<NetInfo *, store_index<PortRef>> crit_sink{nullptr, {}};
+ CellInfo *cell = back_cursor->driver.cell;
+ if (cell == nullptr)
+ break;
+ for (auto port : cell->ports) {
+ if (port.second.type != PORT_IN)
+ continue;
+ NetInfo *pn = port.second.net;
+ if (pn == nullptr)
+ continue;
+ int ccount;
+ DelayQuad combDelay;
+ TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
+ if (tpclass != TMG_COMB_INPUT)
+ continue;
+ bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay);
+ if (!is_path)
+ continue;
+ float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first));
+ if (used_ports.count(&(pn->users.at(port.second.user_idx))))
+ continue;
+ if (usr_crit >= max_crit) {
+ max_crit = usr_crit;
+ crit_sink = std::make_pair(pn, port.second.user_idx);
+ }
+ }
+
+ if (crit_sink.first != nullptr) {
+ crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second)));
+ used_ports.insert(&(crit_sink.first->users.at(crit_sink.second)));
+ }
+ back_cursor = crit_sink.first;
+ }
+ // Iterate forwards following greatest criticiality
+ PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second));
+ while (fwd_cursor != nullptr) {
+ crit_path.push_back(fwd_cursor);
+ float max_crit = 0;
+ std::pair<NetInfo *, store_index<PortRef>> crit_sink{nullptr, {}};
+ CellInfo *cell = fwd_cursor->cell;
+ for (auto port : cell->ports) {
+ if (port.second.type != PORT_OUT)
+ continue;
+ NetInfo *pn = port.second.net;
+ if (pn == nullptr)
+ continue;
+
+ int ccount;
+ DelayQuad combDelay;
+ TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
+ if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT)
+ continue;
+ bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay);
+ if (!is_path)
+ continue;
+ for (auto usr : pn->users.enumerate()) {
+ if (used_ports.count(&(pn->users.at(usr.index))))
+ continue;
+ float crit = tmg.get_criticality(CellPortKey(usr.value));
+ if (crit >= max_crit) {
+ max_crit = crit;
+ crit_sink = std::make_pair(pn, usr.index);
+ }
+ }
+ }
+ if (crit_sink.first != nullptr) {
+ fwd_cursor = &(crit_sink.first->users.at(crit_sink.second));
+ used_ports.insert(&(crit_sink.first->users.at(crit_sink.second)));
+ } else {
+ fwd_cursor = nullptr;
+ }
+ }
+
+ std::vector<PortRef *> crit_path_vec;
+ std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec));
+ crit_paths.push_back(crit_path_vec);
+ }
+
+ return crit_paths;
+ }
+
+ void optimise_path(std::vector<PortRef *> &path)
+ {
+ path_cells.clear();
+ cell_neighbour_bels.clear();
+ bel_candidate_cells.clear();
+ if (ctx->debug)
+ log_info("Optimising the following path: \n");
+
+ auto front_port = path.front();
+ NetInfo *front_net = front_port->cell->ports.at(front_port->port).net;
+ if (front_net != nullptr && front_net->driver.cell != nullptr) {
+ auto front_cell = front_net->driver.cell;
+ if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) &&
+ front_cell->cluster == ClusterId()) {
+ path_cells.push_back(front_cell->name);
+ }
+ }
+
+ for (auto port : path) {
+ if (ctx->debug) {
+ float crit = tmg.get_criticality(CellPortKey(*port));
+ log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx),
+ ctx->nameOfBel(port->cell->bel), crit);
+ }
+ if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end())
+ continue;
+ if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) ||
+ port->cell->cluster != ClusterId())
+ continue;
+ if (ctx->debug)
+ log_info(" can move\n");
+ path_cells.push_back(port->cell->name);
+ }
+
+ if (path_cells.size() < 2) {
+ if (ctx->debug) {
+ log_info("Too few moveable cells; skipping path\n");
+ log_break();
+ }
+
+ return;
+ }
+
+ // Calculate original delay before touching anything
+ delay_t original_delay = 0;
+
+ for (size_t i = 0; i < path.size(); i++) {
+ auto &port = path.at(i)->cell->ports.at(path.at(i)->port);
+ NetInfo *pn = port.net;
+ if (port.user_idx)
+ original_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx));
+ }
+
+ IdString last_cell;
+ const int d = 2; // FIXME: how to best determine d
+ for (auto cell : path_cells) {
+ // FIXME: when should we allow swapping due to a lack of candidates
+ find_neighbours(ctx->cells[cell].get(), last_cell, d, false);
+ last_cell = cell;
+ }
+
+ if (ctx->debug) {
+ for (auto cell : path_cells) {
+ log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), ctx->nameOfBel(ctx->cells[cell]->bel));
+ for (auto neigh : cell_neighbour_bels.at(cell)) {
+ log_info(" %s\n", ctx->nameOfBel(neigh));
+ }
+ }
+ }
+
+ // Actual BFS path optimisation algorithm
+ dict<IdString, dict<BelId, delay_t>> cumul_costs;
+ dict<std::pair<IdString, BelId>, std::pair<IdString, BelId>> backtrace;
+ std::queue<std::pair<int, BelId>> visit;
+ pool<std::pair<int, BelId>> to_visit;
+
+ for (auto startbel : cell_neighbour_bels[path_cells.front()]) {
+ // Swap for legality check
+ CellInfo *cell = ctx->cells.at(path_cells.front()).get();
+ BelId origBel = cell_swap_bel(cell, startbel);
+ std::vector<std::pair<CellInfo *, BelId>> move{std::make_pair(cell, origBel)};
+ if (acceptable_move(move)) {
+ auto entry = std::make_pair(0, startbel);
+ visit.push(entry);
+ cumul_costs[path_cells.front()][startbel] = 0;
+ }
+ // Swap back
+ cell_swap_bel(cell, origBel);
+ }
+
+ while (!visit.empty()) {
+ auto entry = visit.front();
+ visit.pop();
+ auto cellname = path_cells.at(entry.first);
+ if (entry.first == int(path_cells.size()) - 1)
+ continue;
+ std::vector<std::pair<CellInfo *, BelId>> move;
+ // Apply the entire backtrace for accurate legality and delay checks
+ // This is probably pretty expensive (but also probably pales in comparison to the number of swaps
+ // SA will make...)
+ std::vector<std::pair<IdString, BelId>> route_to_entry;
+ auto cursor = std::make_pair(cellname, entry.second);
+ route_to_entry.push_back(cursor);
+ while (backtrace.count(cursor)) {
+ cursor = backtrace.at(cursor);
+ route_to_entry.push_back(cursor);
+ }
+ for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) {
+ CellInfo *cell = ctx->cells.at(rt_entry.first).get();
+ BelId origBel = cell_swap_bel(cell, rt_entry.second);
+ move.push_back(std::make_pair(cell, origBel));
+ }
+
+ // Have a look at where we can travel from here
+ for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) {
+ // Edges between overlapping bels are deleted
+ if (neighbour == entry.second)
+ continue;
+ // Experimentally swap the next path cell onto the neighbour bel we are trying
+ IdString ncname = path_cells.at(entry.first + 1);
+ CellInfo *next_cell = ctx->cells.at(ncname).get();
+ BelId origBel = cell_swap_bel(next_cell, neighbour);
+ move.push_back(std::make_pair(next_cell, origBel));
+
+ delay_t total_delay = 0;
+
+ for (size_t i = 0; i < path.size(); i++) {
+ auto &port = path.at(i)->cell->ports.at(path.at(i)->port);
+ NetInfo *pn = port.net;
+ if (port.user_idx)
+ total_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx));
+ if (path.at(i)->cell == next_cell)
+ break;
+ }
+
+ // First, check if the move is actually worthwhile from a delay point of view before the expensive
+ // legality check
+ if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) ||
+ cumul_costs.at(ncname).at(neighbour) > total_delay) {
+ // Now check that the swaps we have made to get here are legal and meet max delay requirements
+ if (acceptable_move(move)) {
+ cumul_costs[ncname][neighbour] = total_delay;
+ backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second);
+ if (!to_visit.count(std::make_pair(entry.first + 1, neighbour)))
+ visit.push(std::make_pair(entry.first + 1, neighbour));
+ }
+ }
+ // Revert the experimental swap
+ cell_swap_bel(move.back().first, move.back().second);
+ move.pop_back();
+ }
+
+ // Revert move by swapping cells back to their original order
+ // Execute swaps in reverse order to how we made them originally
+ for (auto move_entry : boost::adaptors::reverse(move)) {
+ cell_swap_bel(move_entry.first, move_entry.second);
+ }
+ }
+
+ // Did we find a solution??
+ if (cumul_costs.count(path_cells.back())) {
+ // Find the end position with the lowest total delay
+ auto &end_options = cumul_costs.at(path_cells.back());
+ auto lowest = std::min_element(end_options.begin(), end_options.end(),
+ [](const std::pair<BelId, delay_t> &a, const std::pair<BelId, delay_t> &b) {
+ return a.second < b.second;
+ });
+ NPNR_ASSERT(lowest != end_options.end());
+
+ std::vector<std::pair<IdString, BelId>> route_to_solution;
+ auto cursor = std::make_pair(path_cells.back(), lowest->first);
+ route_to_solution.push_back(cursor);
+ while (backtrace.count(cursor)) {
+ cursor = backtrace.at(cursor);
+ route_to_solution.push_back(cursor);
+ }
+ if (ctx->debug)
+ log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n",
+ ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay));
+ for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) {
+ CellInfo *cell = ctx->cells.at(rt_entry.first).get();
+ cell_swap_bel(cell, rt_entry.second);
+ if (ctx->debug)
+ log_info(" %s at %s\n", rt_entry.first.c_str(ctx), ctx->nameOfBel(rt_entry.second));
+ }
+
+ } else {
+ if (ctx->debug)
+ log_info("Solution was not found\n");
+ }
+ if (ctx->debug)
+ log_break();
+ }
+
+ // Current candidate Bels for cells (linked in both direction>
+ std::vector<IdString> path_cells;
+ dict<IdString, pool<BelId>> cell_neighbour_bels;
+ dict<BelId, pool<IdString>> bel_candidate_cells;
+ // Map cell ports to net delay limit
+ dict<std::pair<IdString, IdString>, delay_t> max_net_delay;
+ Context *ctx;
+ TimingOptCfg cfg;
+ TimingAnalyser tmg;
+};
+
+bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); }
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/timing_opt.h b/common/place/timing_opt.h
new file mode 100644
index 00000000..8f8bc709
--- /dev/null
+++ b/common/place/timing_opt.h
@@ -0,0 +1,37 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2018 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "log.h"
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct TimingOptCfg
+{
+ TimingOptCfg(Context *ctx) {}
+
+ // The timing optimiser will *only* optimise cells of these types
+ // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense
+ // for other cell types
+ pool<IdString> cellTypes;
+};
+
+extern bool timing_opt(Context *ctx, TimingOptCfg cfg);
+
+NEXTPNR_NAMESPACE_END