aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common/place/detail_place_cfg.h36
-rw-r--r--common/place/detail_place_core.cc457
-rw-r--r--common/place/detail_place_core.h224
-rw-r--r--common/place/parallel_refine.cc552
-rw-r--r--common/place/parallel_refine.h6
5 files changed, 730 insertions, 545 deletions
diff --git a/common/place/detail_place_cfg.h b/common/place/detail_place_cfg.h
new file mode 100644
index 00000000..dadda0e8
--- /dev/null
+++ b/common/place/detail_place_cfg.h
@@ -0,0 +1,36 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef DETAIL_PLACE_CFG_H
+#define DETAIL_PLACE_CFG_H
+
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct DetailPlaceCfg
+{
+ DetailPlaceCfg(Context *ctx);
+ bool timing_driven;
+ int hpwl_scale_x, hpwl_scale_y;
+ float crit_exp = 8;
+};
+
+NEXTPNR_NAMESPACE_END
+#endif
diff --git a/common/place/detail_place_core.cc b/common/place/detail_place_core.cc
new file mode 100644
index 00000000..bb383639
--- /dev/null
+++ b/common/place/detail_place_core.cc
@@ -0,0 +1,457 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "detail_place_core.h"
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+DetailPlaceCfg::DetailPlaceCfg(Context *ctx)
+{
+ timing_driven = ctx->setting<bool>("timing_driven");
+
+ hpwl_scale_x = 1;
+ hpwl_scale_y = 1;
+}
+
+PlacePartition::PlacePartition(Context *ctx)
+{
+ x0 = ctx->getGridDimX();
+ y0 = ctx->getGridDimY();
+ x1 = 0;
+ y1 = 0;
+ for (auto &cell : ctx->cells) {
+ Loc l = ctx->getBelLocation(cell.second->bel);
+ x0 = std::min(x0, l.x);
+ x1 = std::max(x1, l.x);
+ y0 = std::min(y0, l.y);
+ y1 = std::max(y1, l.y);
+ cells.push_back(cell.second.get());
+ }
+}
+
+void PlacePartition::split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r)
+{
+ std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) {
+ Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel);
+ return yaxis ? (l0.y < l1.y) : (l0.x < l1.x);
+ });
+ size_t pivot_point = size_t(cells.size() * pivot);
+ l.cells.clear();
+ r.cells.clear();
+ l.cells.reserve(pivot_point);
+ r.cells.reserve(cells.size() - pivot_point);
+ int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1)
+ : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y
+ : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x);
+ for (size_t i = 0; i < cells.size(); i++) {
+ Loc loc = ctx->getBelLocation(cells.at(i)->bel);
+ ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i));
+ }
+ if (yaxis) {
+ l.x0 = r.x0 = x0;
+ l.x1 = r.x1 = x1;
+ l.y0 = y0;
+ l.y1 = pivot_coord;
+ r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1);
+ r.y1 = y1;
+ } else {
+ l.y0 = r.y0 = y0;
+ l.y1 = r.y1 = y1;
+ l.x0 = x0;
+ l.x1 = pivot_coord;
+ r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1);
+ r.x1 = x1;
+ }
+}
+
+void DetailPlacerState::update_global_costs()
+{
+ last_bounds.resize(flat_nets.size());
+ last_tmg_costs.resize(flat_nets.size());
+ total_wirelen = 0;
+ total_timing_cost = 0;
+ for (size_t i = 0; i < flat_nets.size(); i++) {
+ NetInfo *ni = flat_nets.at(i);
+ if (skip_net(ni))
+ continue;
+ last_bounds.at(i) = NetBB::compute(ctx, ni);
+ total_wirelen += last_bounds.at(i).hpwl(base_cfg);
+ if (!timing_skip_net(ni)) {
+ auto &tc = last_tmg_costs.at(i);
+ tc.resize(ni->users.capacity());
+ for (auto usr : ni->users.enumerate()) {
+ tc.at(usr.index.idx()) = get_timing_cost(ni, usr.index);
+ total_timing_cost += tc.at(usr.index.idx());
+ }
+ }
+ }
+}
+
+NetBB NetBB::compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel)
+{
+ NetBB result{};
+ if (!net->driver.cell)
+ return result;
+ auto bel_loc = [&](const CellInfo *cell) {
+ BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel;
+ return ctx->getBelLocation(bel);
+ };
+ result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1;
+ Loc drv_loc = bel_loc(net->driver.cell);
+ result.x0 = result.x1 = drv_loc.x;
+ result.y0 = result.y1 = drv_loc.y;
+ for (auto &usr : net->users) {
+ Loc l = bel_loc(usr.cell);
+ if (l.x == result.x0)
+ ++result.nx0; // on the edge
+ else if (l.x < result.x0) {
+ result.x0 = l.x; // extends the edge
+ result.nx0 = 1;
+ }
+ if (l.x == result.x1)
+ ++result.nx1; // on the edge
+ else if (l.x > result.x1) {
+ result.x1 = l.x; // extends the edge
+ result.nx1 = 1;
+ }
+ if (l.y == result.y0)
+ ++result.ny0; // on the edge
+ else if (l.y < result.y0) {
+ result.y0 = l.y; // extends the edge
+ result.ny0 = 1;
+ }
+ if (l.y == result.y1)
+ ++result.ny1; // on the edge
+ else if (l.y > result.y1) {
+ result.y1 = l.y; // extends the edge
+ result.ny1 = 1;
+ }
+ }
+ return result;
+}
+
+void DetailPlacerThreadState::set_partition(const PlacePartition &part)
+{
+ p = part;
+ thread_nets.clear();
+ thread_net_idx.resize(g.flat_nets.size());
+ std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1);
+ // Determine the set of nets that are within the thread; and therefore we care about
+ for (auto thread_cell : part.cells) {
+ for (auto &port : thread_cell->ports) {
+ if (!port.second.net)
+ continue;
+ int global_idx = port.second.net->udata;
+ auto &thread_idx = thread_net_idx.at(global_idx);
+ // Already added to the set
+ if (thread_idx != -1)
+ continue;
+ thread_idx = thread_nets.size();
+ thread_nets.push_back(port.second.net);
+ }
+ }
+ tmg_ignored_nets.clear();
+ ignored_nets.clear();
+ for (auto tn : thread_nets) {
+ ignored_nets.push_back(g.skip_net(tn));
+ tmg_ignored_nets.push_back(g.timing_skip_net(tn));
+ }
+ // Set up the original cell-bel map for all nets inside the thread
+ local_cell2bel.clear();
+ for (NetInfo *net : thread_nets) {
+ if (net->driver.cell)
+ local_cell2bel[net->driver.cell->name] = net->driver.cell->bel;
+ for (auto &usr : net->users)
+ local_cell2bel[usr.cell->name] = usr.cell->bel;
+ }
+}
+
+void DetailPlacerThreadState::setup_initial_state()
+{
+ // Setup initial net bounding boxes and timing costs
+ net_bounds.clear();
+ arc_tmg_cost.clear();
+ for (auto tn : thread_nets) {
+ net_bounds.push_back(g.last_bounds.at(tn->udata));
+ arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata));
+ }
+ new_net_bounds = net_bounds;
+ for (int j = 0; j < 2; j++) {
+ auto &a = axes.at(j);
+ a.already_bounds_changed.resize(net_bounds.size());
+ }
+ already_timing_changed.clear();
+ already_timing_changed.resize(net_bounds.size());
+ for (size_t i = 0; i < thread_nets.size(); i++)
+ already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity());
+}
+
+bool DetailPlacerThreadState::bounds_check(BelId bel)
+{
+ Loc l = ctx->getBelLocation(bel);
+ if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1)
+ return false;
+ return true;
+}
+
+bool DetailPlacerThreadState::bind_move()
+{
+#if !defined(__wasm)
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ for (auto &entry : moved_cells) {
+ ctx->unbindBel(entry.second.first);
+ }
+ bool success = true;
+ for (auto &entry : moved_cells) {
+ // Make sure targets are available before we bind them
+ if (!ctx->checkBelAvail(entry.second.second)) {
+ success = false;
+ break;
+ }
+ ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = true;
+ return success;
+}
+
+bool DetailPlacerThreadState::check_validity()
+{
+#if !defined(__wasm)
+ std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ bool result = true;
+ for (auto e : moved_cells) {
+ if (!ctx->isBelLocationValid(e.second.first)) {
+ // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer
+ // enabling dedicated routes to be used
+ result = false;
+ break;
+ }
+ if (!ctx->isBelLocationValid(e.second.second)) {
+ result = false;
+ break;
+ }
+ }
+ return result;
+}
+
+void DetailPlacerThreadState::revert_move()
+{
+ if (arch_state_dirty) {
+ // If changes to the arch state were made, revert them by restoring original cell bindings
+#if !defined(__wasm)
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ for (auto &entry : moved_cells) {
+ BelId curr_bound = ctx->cells.at(entry.first)->bel;
+ if (curr_bound != BelId())
+ ctx->unbindBel(curr_bound);
+ }
+ for (auto &entry : moved_cells) {
+ ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = false;
+ }
+ for (auto &entry : moved_cells)
+ local_cell2bel[entry.first] = entry.second.first;
+}
+
+void DetailPlacerThreadState::commit_move()
+{
+ arch_state_dirty = false;
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ // Commit updated net bounds
+ net_bounds.at(bc) = new_net_bounds.at(bc);
+ }
+ }
+ if (g.base_cfg.timing_driven) {
+ NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size());
+ for (size_t i = 0; i < timing_changed_arcs.size(); i++) {
+ auto arc = timing_changed_arcs.at(i);
+ arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i);
+ }
+ }
+}
+
+void DetailPlacerThreadState::compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel)
+{
+ Loc new_loc = ctx->getBelLocation(new_bel);
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ for (const auto &port : cell->ports) {
+ NetInfo *pn = port.second.net;
+ if (!pn)
+ continue;
+ int idx = thread_net_idx.at(pn->udata);
+ if (ignored_nets.at(idx))
+ continue;
+ NetBB &new_bounds = new_net_bounds.at(idx);
+ // For the x-axis (i=0) and y-axis (i=1)
+ for (int i = 0; i < 2; i++) {
+ auto &axis = axes.at(i);
+ // New and old on this axis
+ int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x;
+ // References to updated bounding box entries
+ auto &b0 = i ? new_bounds.y0 : new_bounds.x0;
+ auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0;
+ auto &b1 = i ? new_bounds.y1 : new_bounds.x1;
+ auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1;
+ auto &change = axis.already_bounds_changed.at(idx);
+ // Lower bound
+ if (new_pos < b0) {
+ // Further out than current lower bound
+ b0 = new_pos;
+ n0 = 1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b0 && old_pos > b0) {
+ // Moved from inside into current bound
+ ++n0;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b0 && new_pos > b0) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n0 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n0;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ // Upper bound
+ if (new_pos > b1) {
+ // Further out than current upper bound
+ b1 = new_pos;
+ n1 = new_pos;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b1 && old_pos < b1) {
+ // Moved onto current bound
+ ++n1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b1 && new_pos < b1) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n1 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n1;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ }
+ // Timing updates if timing driven
+ if (g.base_cfg.timing_driven && !tmg_ignored_nets.at(idx)) {
+ if (port.second.type == PORT_OUT) {
+ int cc;
+ TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
+ if (cls != TMG_IGNORE) {
+ for (auto usr : pn->users.enumerate())
+ if (!already_timing_changed.at(idx).at(usr.index.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index));
+ already_timing_changed.at(idx).at(usr.index.idx()) = true;
+ }
+ }
+ } else {
+ auto usr = port.second.user_idx;
+ if (!already_timing_changed.at(idx).at(usr.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr));
+ already_timing_changed.at(idx).at(usr.idx()) = true;
+ }
+ }
+ }
+ }
+}
+
+void DetailPlacerThreadState::compute_total_change()
+{
+ auto &xa = axes.at(0), &ya = axes.at(1);
+ for (auto &bc : xa.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : xa.bounds_changed_nets)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg));
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == NO_CHANGE)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg));
+ if (g.base_cfg.timing_driven) {
+ NPNR_ASSERT(new_timing_costs.empty());
+ for (auto arc : timing_changed_arcs) {
+ double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel);
+ timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx()));
+ new_timing_costs.push_back(new_cost);
+ }
+ }
+}
+
+void DetailPlacerThreadState::reset_move_state()
+{
+ moved_cells.clear();
+ cell_rel.clear();
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ new_net_bounds.at(bc) = net_bounds.at(bc);
+ axis.already_bounds_changed[bc] = NO_CHANGE;
+ }
+ axis.bounds_changed_nets.clear();
+ }
+ for (auto &arc : timing_changed_arcs) {
+ already_timing_changed.at(arc.first).at(arc.second.idx()) = false;
+ }
+ timing_changed_arcs.clear();
+ new_timing_costs.clear();
+ wirelen_delta = 0;
+ timing_delta = 0;
+}
+
+bool DetailPlacerThreadState::add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel)
+{
+ if (!bounds_check(old_bel) || !bounds_check(new_bel))
+ return false;
+ if (!ctx->isValidBelForCellType(cell->type, new_bel))
+ return false;
+ NPNR_ASSERT(!moved_cells.count(cell->name));
+ moved_cells[cell->name] = std::make_pair(old_bel, new_bel);
+ local_cell2bel[cell->name] = new_bel;
+ compute_changes_for_cell(cell, old_bel, new_bel);
+ return true;
+}
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/detail_place_core.h b/common/place/detail_place_core.h
new file mode 100644
index 00000000..1c280f24
--- /dev/null
+++ b/common/place/detail_place_core.h
@@ -0,0 +1,224 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+This provides core data structures for a thread-safe detail placer that needs to swap cells and evaluate the cost
+changes of swaps.
+
+It works on a partition-based threading approach; although threading can be avoided by only instantiating one
+per-thread structure and calling its methods from the main thread.
+
+Each thread's data includes its own local net indexing for nets inside the partition (which can overlap thread
+boundaries); and its own local cell-to-bel mapping for any cells on those nets, so there are no races with moves
+made by other threads.
+
+A move is an atomic transaction of updated cell to bel mappings inside a thread. The first step is to reset the
+per-move structures; then to add all of the moved cells to the move with add_to_move.
+
+Evaluation of wirelength and timing changes of a move is done with compute_changes_for_cell and compute_total_change.
+
+bind_move will probationally bind the move using the arch API functions, acquiring a lock during this time to prevent
+races on non-thread-safe arch implementations, returning true if the bind succeeded or false if something went wrong
+and it should be aborted. check_validity must then be called to use the arch API validity check functions on the move.
+
+Finally if the move meets criteria and is accepted then commit_move marks it as committed, otherwise revert_move
+aborts the entire move transaction.
+*/
+
+#ifndef DETAIL_PLACE_CORE_H
+#define DETAIL_PLACE_CORE_H
+
+#include "nextpnr.h"
+
+#include "detail_place_cfg.h"
+#include "fast_bels.h"
+#include "timing.h"
+
+#include <queue>
+
+#if !defined(__wasm)
+#include <shared_mutex>
+#endif
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct PlacePartition
+{
+ int x0, y0, x1, y1;
+ std::vector<CellInfo *> cells;
+ PlacePartition() = default;
+ explicit PlacePartition(Context *ctx);
+ void split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r);
+};
+
+typedef int64_t wirelen_t;
+
+struct NetBB
+{
+ // Actual bounding box
+ int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
+ // Number of cells at each extremity
+ int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
+ inline wirelen_t hpwl(const DetailPlaceCfg &cfg) const
+ {
+ return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
+ }
+ static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr);
+};
+
+struct DetailPlacerState
+{
+ explicit DetailPlacerState(Context *ctx, DetailPlaceCfg &cfg)
+ : ctx(ctx), base_cfg(cfg), bels(ctx, false, 64), tmg(ctx){};
+ Context *ctx;
+ DetailPlaceCfg &base_cfg;
+ FastBels bels;
+ std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index
+ std::vector<NetBB> last_bounds;
+ std::vector<std::vector<double>> last_tmg_costs;
+ dict<IdString, NetBB> region_bounds;
+ TimingAnalyser tmg;
+
+ wirelen_t total_wirelen = 0;
+ double total_timing_cost = 0;
+
+#if !defined(__wasm)
+ std::shared_timed_mutex archapi_mutex;
+#endif
+
+ inline double get_timing_cost(const NetInfo *net, store_index<PortRef> user,
+ const dict<IdString, BelId> *cell2bel = nullptr)
+ {
+ if (!net->driver.cell)
+ return 0;
+ const auto &sink = net->users.at(user);
+ IdString driver_pin, sink_pin;
+ // Pick the first pin for a prediction; assume all will be similar enouhg
+ for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) {
+ driver_pin = pin;
+ break;
+ }
+ for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) {
+ sink_pin = pin;
+ break;
+ }
+ float crit = tmg.get_criticality(CellPortKey(sink));
+ BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel;
+ BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel;
+ double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin));
+ return delay * std::pow(crit, base_cfg.crit_exp);
+ }
+
+ inline bool skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ if (ctx->getBelGlobalBuf(net->driver.cell->bel))
+ return true;
+ return false;
+ }
+
+ inline bool timing_skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ int cc;
+ auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc);
+ if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK)
+ return true;
+ return false;
+ }
+
+ void update_global_costs();
+};
+
+struct DetailPlacerThreadState
+{
+ Context *ctx; // Nextpnr context pointer
+ DetailPlacerState &g; // Placer engine state
+ int idx; // Index of the thread
+ DeterministicRNG rng; // Local RNG
+ // The cell partition that the thread works on
+ PlacePartition p;
+ // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can
+ // optimise
+ std::vector<int> thread_net_idx;
+ // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's
+ // perspective
+ std::vector<NetInfo *> thread_nets;
+ std::vector<NetBB> net_bounds;
+ std::vector<std::vector<double>> arc_tmg_cost;
+ std::vector<bool> ignored_nets, tmg_ignored_nets;
+ bool arch_state_dirty = false;
+ // Our local cell-bel map; that won't be affected by out-of-partition moves
+ dict<IdString, BelId> local_cell2bel;
+
+ // Data on an inflight move
+ dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new)
+ // For cluster moves only
+ std::vector<std::pair<CellInfo *, Loc>> cell_rel;
+ // For incremental wirelength and delay updates
+ wirelen_t wirelen_delta = 0;
+ double timing_delta = 0;
+ // Wirelen related are handled on a per-axis basis to reduce
+ enum BoundChange
+ {
+ NO_CHANGE,
+ CELL_MOVED_INWARDS,
+ CELL_MOVED_OUTWARDS,
+ FULL_RECOMPUTE
+ };
+ struct AxisChanges
+ {
+ std::vector<int> bounds_changed_nets;
+ std::vector<BoundChange> already_bounds_changed;
+ };
+ std::array<AxisChanges, 2> axes;
+ std::vector<NetBB> new_net_bounds;
+
+ std::vector<std::vector<bool>> already_timing_changed;
+ std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs;
+ std::vector<double> new_timing_costs;
+
+ DetailPlacerThreadState(Context *ctx, DetailPlacerState &g, int idx) : ctx(ctx), g(g), idx(idx){};
+ void set_partition(const PlacePartition &part);
+ void setup_initial_state();
+ bool bounds_check(BelId bel);
+
+ // Reset the inflight move state
+ void reset_move_state();
+ // Add a cell change to the move
+ bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel);
+ // For an inflight move; attempt to actually apply the changes to the arch API
+ bool bind_move();
+ // Checks if the arch API bel validity for a move is accepted
+ bool check_validity();
+ // Undo any changes relating to an inflight move
+ void revert_move();
+ // Mark the inflight move as complete and update cost structures
+ void commit_move();
+ // Update the inflight cost change structures for a given cell moe
+ void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel);
+ // Update the total cost change for an inflight move
+ void compute_total_change();
+};
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/parallel_refine.cc b/common/place/parallel_refine.cc
index a868ca58..a2de5b13 100644
--- a/common/place/parallel_refine.cc
+++ b/common/place/parallel_refine.cc
@@ -22,9 +22,8 @@
#if !defined(__wasm)
-#include "fast_bels.h"
+#include "detail_place_core.h"
#include "scope_lock.h"
-#include "timing.h"
#include <chrono>
#include <mutex>
@@ -35,515 +34,24 @@
NEXTPNR_NAMESPACE_BEGIN
namespace {
-struct Partition
-{
- int x0, y0, x1, y1;
- std::vector<CellInfo *> cells;
- Partition() = default;
- explicit Partition(Context *ctx)
- {
- x0 = ctx->getGridDimX();
- y0 = ctx->getGridDimY();
- x1 = 0;
- y1 = 0;
- for (auto &cell : ctx->cells) {
- Loc l = ctx->getBelLocation(cell.second->bel);
- x0 = std::min(x0, l.x);
- x1 = std::max(x1, l.x);
- y0 = std::min(y0, l.y);
- y1 = std::max(y1, l.y);
- cells.push_back(cell.second.get());
- }
- }
- void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r)
- {
- std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) {
- Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel);
- return yaxis ? (l0.y < l1.y) : (l0.x < l1.x);
- });
- size_t pivot_point = size_t(cells.size() * pivot);
- l.cells.clear();
- r.cells.clear();
- l.cells.reserve(pivot_point);
- r.cells.reserve(cells.size() - pivot_point);
- int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1)
- : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y
- : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x);
- for (size_t i = 0; i < cells.size(); i++) {
- Loc loc = ctx->getBelLocation(cells.at(i)->bel);
- ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i));
- }
- if (yaxis) {
- l.x0 = r.x0 = x0;
- l.x1 = r.x1 = x1;
- l.y0 = y0;
- l.y1 = pivot_coord;
- r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1);
- r.y1 = y1;
- } else {
- l.y0 = r.y0 = y0;
- l.y1 = r.y1 = y1;
- l.x0 = x0;
- l.x1 = pivot_coord;
- r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1);
- r.x1 = x1;
- }
- }
-};
-typedef int64_t wirelen_t;
-
-struct NetBB
+struct GlobalState : DetailPlacerState
{
- // Actual bounding box
- int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
- // Number of cells at each extremity
- int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
- wirelen_t hpwl(const ParallelRefineCfg &cfg) const
- {
- return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
- }
- static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr)
- {
- NetBB result{};
- if (!net->driver.cell)
- return result;
- auto bel_loc = [&](const CellInfo *cell) {
- BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel;
- return ctx->getBelLocation(bel);
- };
- result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1;
- Loc drv_loc = bel_loc(net->driver.cell);
- result.x0 = result.x1 = drv_loc.x;
- result.y0 = result.y1 = drv_loc.y;
- for (auto &usr : net->users) {
- Loc l = bel_loc(usr.cell);
- if (l.x == result.x0)
- ++result.nx0; // on the edge
- else if (l.x < result.x0) {
- result.x0 = l.x; // extends the edge
- result.nx0 = 1;
- }
- if (l.x == result.x1)
- ++result.nx1; // on the edge
- else if (l.x > result.x1) {
- result.x1 = l.x; // extends the edge
- result.nx1 = 1;
- }
- if (l.y == result.y0)
- ++result.ny0; // on the edge
- else if (l.y < result.y0) {
- result.y0 = l.y; // extends the edge
- result.ny0 = 1;
- }
- if (l.y == result.y1)
- ++result.ny1; // on the edge
- else if (l.y > result.y1) {
- result.y1 = l.y; // extends the edge
- result.ny1 = 1;
- }
- }
- return result;
- }
-};
+ explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : DetailPlacerState(ctx, this->cfg), cfg(cfg){};
-struct GlobalState
-{
- explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){};
- Context *ctx;
ParallelRefineCfg cfg;
- FastBels bels;
- std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index
- std::vector<NetBB> last_bounds;
- std::vector<std::vector<double>> last_tmg_costs;
- dict<IdString, NetBB> region_bounds;
- TimingAnalyser tmg;
-
- std::shared_timed_mutex archapi_mutex;
-
double temperature = 1e-7;
int radius = 3;
-
- wirelen_t total_wirelen = 0;
- double total_timing_cost = 0;
-
- double get_timing_cost(const NetInfo *net, store_index<PortRef> user,
- const dict<IdString, BelId> *cell2bel = nullptr)
- {
- if (!net->driver.cell)
- return 0;
- const auto &sink = net->users.at(user);
- IdString driver_pin, sink_pin;
- // Pick the first pin for a prediction; assume all will be similar enouhg
- for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) {
- driver_pin = pin;
- break;
- }
- for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) {
- sink_pin = pin;
- break;
- }
- float crit = tmg.get_criticality(CellPortKey(sink));
- BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel;
- BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel;
- double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin));
- return delay * std::pow(crit, cfg.crit_exp);
- }
-
- bool skip_net(const NetInfo *net) const
- {
- if (!net->driver.cell)
- return true;
- if (ctx->getBelGlobalBuf(net->driver.cell->bel))
- return true;
- return false;
- }
- bool timing_skip_net(const NetInfo *net) const
- {
- if (!net->driver.cell)
- return true;
- int cc;
- auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc);
- if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK)
- return true;
- return false;
- }
// ....
};
-struct ThreadState
+struct ThreadState : DetailPlacerThreadState
{
- Context *ctx; // Nextpnr context pointer
- GlobalState &g; // Refinement engine state
- int idx; // Index of the thread
- DeterministicRNG rng; // Local RNG
- // The cell partition that the thread works on
- Partition p;
- // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can
- // optimise
- std::vector<int> thread_net_idx;
- // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's
- // perspective
- std::vector<NetInfo *> thread_nets;
- std::vector<NetBB> net_bounds;
- std::vector<std::vector<double>> arc_tmg_cost;
- std::vector<bool> ignored_nets, tmg_ignored_nets;
- bool arch_state_dirty = false;
- // Our local cell-bel map; that won't be affected by out-of-partition moves
- dict<IdString, BelId> local_cell2bel;
-
- // Data on an inflight move
- dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new)
- // For cluster moves only
- std::vector<std::pair<CellInfo *, Loc>> cell_rel;
- // For incremental wirelength and delay updates
- wirelen_t wirelen_delta = 0;
- double timing_delta = 0;
-
+ ThreadState(Context *ctx, GlobalState &g, int idx) : DetailPlacerThreadState(ctx, g, idx), g(g){};
// Total made and accepted moved
+ GlobalState &g;
int n_move = 0, n_accept = 0;
- enum BoundChange
- {
- NO_CHANGE,
- CELL_MOVED_INWARDS,
- CELL_MOVED_OUTWARDS,
- FULL_RECOMPUTE
- };
- // Wirelen related are handled on a per-axis basis to reduce
- struct AxisChanges
- {
- std::vector<int> bounds_changed_nets;
- std::vector<BoundChange> already_bounds_changed;
- };
- std::array<AxisChanges, 2> axes;
- std::vector<NetBB> new_net_bounds;
-
- std::vector<std::vector<bool>> already_timing_changed;
- std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs;
- std::vector<double> new_timing_costs;
-
- ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){};
- void set_partition(const Partition &part)
- {
- p = part;
- thread_nets.clear();
- thread_net_idx.resize(g.flat_nets.size());
- std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1);
- // Determine the set of nets that are within the thread; and therefore we care about
- for (auto thread_cell : part.cells) {
- for (auto &port : thread_cell->ports) {
- if (!port.second.net)
- continue;
- int global_idx = port.second.net->udata;
- auto &thread_idx = thread_net_idx.at(global_idx);
- // Already added to the set
- if (thread_idx != -1)
- continue;
- thread_idx = thread_nets.size();
- thread_nets.push_back(port.second.net);
- }
- }
- tmg_ignored_nets.clear();
- ignored_nets.clear();
- for (auto tn : thread_nets) {
- ignored_nets.push_back(g.skip_net(tn));
- tmg_ignored_nets.push_back(g.timing_skip_net(tn));
- }
- // Set up the original cell-bel map for all nets inside the thread
- local_cell2bel.clear();
- for (NetInfo *net : thread_nets) {
- if (net->driver.cell)
- local_cell2bel[net->driver.cell->name] = net->driver.cell->bel;
- for (auto &usr : net->users)
- local_cell2bel[usr.cell->name] = usr.cell->bel;
- }
- }
- void setup_initial_state()
- {
- // Setup initial net bounding boxes and timing costs
- net_bounds.clear();
- arc_tmg_cost.clear();
- for (auto tn : thread_nets) {
- net_bounds.push_back(g.last_bounds.at(tn->udata));
- arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata));
- }
- new_net_bounds = net_bounds;
- for (int j = 0; j < 2; j++) {
- auto &a = axes.at(j);
- a.already_bounds_changed.resize(net_bounds.size());
- }
- already_timing_changed.clear();
- already_timing_changed.resize(net_bounds.size());
- for (size_t i = 0; i < thread_nets.size(); i++)
- already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity());
- }
- bool bounds_check(BelId bel)
- {
- Loc l = ctx->getBelLocation(bel);
- if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1)
- return false;
- return true;
- }
- bool bind_move()
- {
- std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- for (auto &entry : moved_cells) {
- ctx->unbindBel(entry.second.first);
- }
- bool success = true;
- for (auto &entry : moved_cells) {
- // Make sure targets are available before we bind them
- if (!ctx->checkBelAvail(entry.second.second)) {
- success = false;
- break;
- }
- ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
- }
- arch_state_dirty = true;
- return success;
- }
- bool check_validity()
- {
- std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- bool result = true;
- for (auto e : moved_cells) {
- if (!ctx->isBelLocationValid(e.second.first)) {
- // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer
- // enabling dedicated routes to be used
- result = false;
- break;
- }
- if (!ctx->isBelLocationValid(e.second.second)) {
- result = false;
- break;
- }
- }
- return result;
- }
- void revert_move()
- {
- if (arch_state_dirty) {
- // If changes to the arch state were made, revert them by restoring original cell bindings
- std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- for (auto &entry : moved_cells) {
- BelId curr_bound = ctx->cells.at(entry.first)->bel;
- if (curr_bound != BelId())
- ctx->unbindBel(curr_bound);
- }
- for (auto &entry : moved_cells) {
- ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
- }
- arch_state_dirty = false;
- }
- for (auto &entry : moved_cells)
- local_cell2bel[entry.first] = entry.second.first;
- }
- void commit_move()
- {
- arch_state_dirty = false;
- for (auto &axis : axes) {
- for (auto bc : axis.bounds_changed_nets) {
- // Commit updated net bounds
- net_bounds.at(bc) = new_net_bounds.at(bc);
- }
- }
- if (g.cfg.timing_driven) {
- NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size());
- for (size_t i = 0; i < timing_changed_arcs.size(); i++) {
- auto arc = timing_changed_arcs.at(i);
- arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i);
- }
- }
- }
- void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel)
- {
- Loc new_loc = ctx->getBelLocation(new_bel);
- Loc old_loc = ctx->getBelLocation(old_bel);
- for (const auto &port : cell->ports) {
- NetInfo *pn = port.second.net;
- if (!pn)
- continue;
- int idx = thread_net_idx.at(pn->udata);
- if (ignored_nets.at(idx))
- continue;
- NetBB &new_bounds = new_net_bounds.at(idx);
- // For the x-axis (i=0) and y-axis (i=1)
- for (int i = 0; i < 2; i++) {
- auto &axis = axes.at(i);
- // New and old on this axis
- int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x;
- // References to updated bounding box entries
- auto &b0 = i ? new_bounds.y0 : new_bounds.x0;
- auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0;
- auto &b1 = i ? new_bounds.y1 : new_bounds.x1;
- auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1;
- auto &change = axis.already_bounds_changed.at(idx);
- // Lower bound
- if (new_pos < b0) {
- // Further out than current lower bound
- b0 = new_pos;
- n0 = 1;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (new_pos == b0 && old_pos > b0) {
- // Moved from inside into current bound
- ++n0;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (old_pos == b0 && new_pos > b0) {
- // Moved from current bound to inside
- if (change == NO_CHANGE)
- axis.bounds_changed_nets.push_back(idx);
- if (n0 == 1) {
- // Was the last cell on the bound; have to do a full recompute
- change = FULL_RECOMPUTE;
- } else {
- --n0;
- if (change == NO_CHANGE)
- change = CELL_MOVED_INWARDS;
- }
- }
- // Upper bound
- if (new_pos > b1) {
- // Further out than current upper bound
- b1 = new_pos;
- n1 = new_pos;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (new_pos == b1 && old_pos < b1) {
- // Moved onto current bound
- ++n1;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (old_pos == b1 && new_pos < b1) {
- // Moved from current bound to inside
- if (change == NO_CHANGE)
- axis.bounds_changed_nets.push_back(idx);
- if (n1 == 1) {
- // Was the last cell on the bound; have to do a full recompute
- change = FULL_RECOMPUTE;
- } else {
- --n1;
- if (change == NO_CHANGE)
- change = CELL_MOVED_INWARDS;
- }
- }
- }
- // Timing updates if timing driven
- if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) {
- if (port.second.type == PORT_OUT) {
- int cc;
- TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
- if (cls != TMG_IGNORE) {
- for (auto usr : pn->users.enumerate())
- if (!already_timing_changed.at(idx).at(usr.index.idx())) {
- timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index));
- already_timing_changed.at(idx).at(usr.index.idx()) = true;
- }
- }
- } else {
- auto usr = port.second.user_idx;
- if (!already_timing_changed.at(idx).at(usr.idx())) {
- timing_changed_arcs.emplace_back(std::make_pair(idx, usr));
- already_timing_changed.at(idx).at(usr.idx()) = true;
- }
- }
- }
- }
- }
- void compute_total_change()
- {
- auto &xa = axes.at(0), &ya = axes.at(1);
- for (auto &bc : xa.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
- new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
- for (auto &bc : ya.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE &&
- ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
- new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
- for (auto &bc : xa.bounds_changed_nets)
- wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
- for (auto &bc : ya.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) == NO_CHANGE)
- wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
- if (g.cfg.timing_driven) {
- NPNR_ASSERT(new_timing_costs.empty());
- for (auto arc : timing_changed_arcs) {
- double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel);
- timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx()));
- new_timing_costs.push_back(new_cost);
- }
- }
- }
- void reset_move_state()
- {
- moved_cells.clear();
- cell_rel.clear();
- for (auto &axis : axes) {
- for (auto bc : axis.bounds_changed_nets) {
- new_net_bounds.at(bc) = net_bounds.at(bc);
- axis.already_bounds_changed[bc] = NO_CHANGE;
- }
- axis.bounds_changed_nets.clear();
- }
- for (auto &arc : timing_changed_arcs) {
- already_timing_changed.at(arc.first).at(arc.second.idx()) = false;
- }
- timing_changed_arcs.clear();
- new_timing_costs.clear();
- wirelen_delta = 0;
- timing_delta = 0;
- }
-
bool accept_move()
{
static constexpr double epsilon = 1e-20;
@@ -553,19 +61,6 @@ struct ThreadState
(g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature));
}
- bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel)
- {
- if (!bounds_check(old_bel) || !bounds_check(new_bel))
- return false;
- if (!ctx->isValidBelForCellType(cell->type, new_bel))
- return false;
- NPNR_ASSERT(!moved_cells.count(cell->name));
- moved_cells[cell->name] = std::make_pair(old_bel, new_bel);
- local_cell2bel[cell->name] = new_bel;
- compute_changes_for_cell(cell, old_bel, new_bel);
- return true;
- }
-
bool single_cell_swap(CellInfo *cell, BelId new_bel)
{
NPNR_ASSERT(moved_cells.empty());
@@ -806,14 +301,14 @@ struct ParallelRefine
g.bels.addCellType(cell_type);
}
};
- std::vector<Partition> parts;
+ std::vector<PlacePartition> parts;
void do_partition()
{
parts.clear();
parts.emplace_back(ctx);
bool yaxis = false;
while (parts.size() < t.size()) {
- std::vector<Partition> next(parts.size() * 2);
+ std::vector<PlacePartition> next(parts.size() * 2);
for (size_t i = 0; i < parts.size(); i++) {
// Randomly permute pivot every iteration so we get different thread boundaries
const float delta = 0.1;
@@ -834,28 +329,6 @@ struct ParallelRefine
w.join();
}
- void update_global_costs()
- {
- g.last_bounds.resize(g.flat_nets.size());
- g.last_tmg_costs.resize(g.flat_nets.size());
- g.total_wirelen = 0;
- g.total_timing_cost = 0;
- for (size_t i = 0; i < g.flat_nets.size(); i++) {
- NetInfo *ni = g.flat_nets.at(i);
- if (g.skip_net(ni))
- continue;
- g.last_bounds.at(i) = NetBB::compute(ctx, ni);
- g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg);
- if (!g.timing_skip_net(ni)) {
- auto &tc = g.last_tmg_costs.at(i);
- tc.resize(ni->users.capacity());
- for (auto usr : ni->users.enumerate()) {
- tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index);
- g.total_timing_cost += tc.at(usr.index.idx());
- }
- }
- }
- }
void run()
{
@@ -868,7 +341,7 @@ struct ParallelRefine
log_info("Running parallel refinement with %d threads.\n", int(t.size()));
int iter = 1;
bool done = false;
- update_global_costs();
+ g.update_global_costs();
double avg_wirelen = g.total_wirelen;
wirelen_t min_wirelen = g.total_wirelen;
while (true) {
@@ -914,7 +387,7 @@ struct ParallelRefine
for (auto &w : workers)
w.join();
g.tmg.run();
- update_global_costs();
+ g.update_global_costs();
iter++;
ctx->yield();
}
@@ -924,17 +397,14 @@ struct ParallelRefine
};
} // namespace
-ParallelRefineCfg::ParallelRefineCfg(Context *ctx)
+ParallelRefineCfg::ParallelRefineCfg(Context *ctx) : DetailPlaceCfg(ctx)
{
- timing_driven = ctx->setting<bool>("timing_driven");
threads = ctx->setting<int>("threads", 8);
// snap to nearest power of two; and minimum thread size
int actual_threads = 1;
while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size)
actual_threads *= 2;
threads = actual_threads;
- hpwl_scale_x = 1;
- hpwl_scale_y = 1;
}
bool parallel_refine(Context *ctx, ParallelRefineCfg cfg)
diff --git a/common/place/parallel_refine.h b/common/place/parallel_refine.h
index 556317cd..a6c8b469 100644
--- a/common/place/parallel_refine.h
+++ b/common/place/parallel_refine.h
@@ -20,18 +20,16 @@
#ifndef PARALLEL_REFINE_H
#define PARALLEL_REFINE_H
+#include "detail_place_cfg.h"
#include "nextpnr.h"
NEXTPNR_NAMESPACE_BEGIN
-struct ParallelRefineCfg
+struct ParallelRefineCfg : DetailPlaceCfg
{
ParallelRefineCfg(Context *ctx);
- bool timing_driven;
int threads;
- int hpwl_scale_x, hpwl_scale_y;
double lambda = 0.5f;
- float crit_exp = 8;
int inner_iters = 15;
int min_thread_size = 500;
};
n2528'>2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783