/* * nextpnr -- Next Generation Place and Route * * Copyright (C) 2019 David Shah * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * [[cite]] HeAP * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf * * [[cite]] SimPL * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf * * Notable changes from the original algorithm * - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is * added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation) * as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by * spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices. * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) */ #ifdef WITH_HEAP #include "placer_heap.h" #include #include #include #include #include #include #include #include #include #include #include "fast_bels.h" #include "log.h" #include "nextpnr.h" #include "place_common.h" #include "placer1.h" #include "scope_lock.h" #include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN namespace { // A simple internal representation for a sparse system of equations Ax = rhs // This is designed to decouple the functions that build the matrix to the engine that // solves it, and the representation that requires template struct EquationSystem { EquationSystem(size_t rows, size_t cols) { A.resize(cols); rhs.resize(rows); } // Simple sparse format, easy to convert to CCS for solver std::vector>> A; // col -> (row, x[row, col]) sorted by row std::vector rhs; // RHS vector void reset() { for (auto &col : A) col.clear(); std::fill(rhs.begin(), rhs.end(), T()); } void add_coeff(int row, int col, T val) { auto &Ac = A.at(col); // Binary search int b = 0, e = int(Ac.size()) - 1; while (b <= e) { int i = (b + e) / 2; if (Ac.at(i).first == row) { Ac.at(i).second += val; return; } if (Ac.at(i).first > row) e = i - 1; else b = i + 1; } Ac.insert(Ac.begin() + b, std::make_pair(row, val)); } void add_rhs(int row, T val) { rhs[row] += val; } void solve(std::vector &x, float tolerance) { using namespace Eigen; if (x.empty()) return; NPNR_ASSERT(x.size() == A.size()); VectorXd vx(x.size()), vb(rhs.size()); SparseMatrix mat(A.size(), A.size()); std::vector colnnz; for (auto &Ac : A) colnnz.push_back(int(Ac.size())); mat.reserve(colnnz); for (int col = 0; col < int(A.size()); col++) { auto &Ac = A.at(col); for (auto &el : Ac) mat.insert(el.first, col) = el.second; } for (int i = 0; i < int(x.size()); i++) vx[i] = x.at(i); for (int i = 0; i < int(rhs.size()); i++) vb[i] = rhs.at(i); ConjugateGradient, Lower | Upper> solver; solver.setTolerance(tolerance); VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx); for (int i = 0; i < int(x.size()); i++) x.at(i) = xr[i]; // for (int i = 0; i < int(x.size()); i++) // log_info("x[%d] = %f\n", i, x.at(i)); } }; } // namespace class HeAPPlacer { public: HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx) { Eigen::initParallel(); tmg.setup_only = true; tmg.setup(); } bool place() { auto startt = std::chrono::high_resolution_clock::now(); nextpnr::ScopeLock lock(ctx); place_constraints(); build_fast_bels(); seed_placement(); update_all_chains(); wirelen_t hpwl = total_hpwl(); log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n", int(place_cells.size()), int(hpwl)); for (int i = 0; i < 4; i++) { setup_solve_cells(); auto solve_startt = std::chrono::high_resolution_clock::now(); #ifdef NPNR_DISABLE_THREADS build_solve_direction(false, -1); build_solve_direction(true, -1); #else boost::thread xaxis([&]() { build_solve_direction(false, -1); }); build_solve_direction(true, -1); xaxis.join(); #endif auto solve_endt = std::chrono::high_resolution_clock::now(); solve_time += std::chrono::duration(solve_endt - solve_startt).count(); update_all_chains(); hpwl = total_hpwl(); log_info(" at initial placer iter %d, wirelen = %d\n", i, int(hpwl)); } wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; std::vector> solution; std::vector> heap_runs; std::unordered_set all_buckets; std::unordered_map bucket_count; for (auto cell : place_cells) { BelBucketId bucket = ctx->getBelBucketForCellType(cell->type); if (!all_buckets.count(bucket)) { heap_runs.push_back(std::unordered_set{bucket}); all_buckets.insert(bucket); } bucket_count[bucket]++; } // If more than 98% of cells are one cell type, always solve all at once // Otherwise, follow full HeAP strategy of rotate&all for (auto &c : bucket_count) { if (c.second >= 0.98 * int(place_cells.size())) { heap_runs.clear(); break; } } if (cfg.placeAllAtOnce) { // Never want to deal with LUTs, FFs, MUXFxs separately, // for now disable all single-cell-type runs and only have heterogeneous // runs heap_runs.clear(); } heap_runs.push_back(all_buckets); // The main HeAP placer loop log_info("Running main analytical placer.\n"); while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) { // Alternate between particular bel types and all bels for (auto &run : heap_runs) { auto run_startt = std::chrono::high_resolution_clock::now(); setup_solve_cells(&run); if (solve_cells.empty()) continue; // Heuristic: don't bother with threading below a certain size auto solve_startt = std::chrono::high_resolution_clock::now(); // Build the connectivity matrix and run the solver; multithreaded between x and y axes if applicable #ifndef NPNR_DISABLE_THREADS if (solve_cells.size() >= 500) { boost::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); }); build_solve_direction(true, (iter == 0) ? -1 : iter); xaxis.join(); } else #endif { build_solve_direction(false, (iter == 0) ? -1 : iter); build_solve_direction(true, (iter == 0) ? -1 : iter); } auto solve_endt = std::chrono::high_resolution_clock::now(); solve_time += std::chrono::duration(solve_endt - solve_startt).count(); update_all_chains(); solved_hpwl = total_hpwl(); update_all_chains(); // Run the spreader for (const auto &group : cfg.cellGroups) CutSpreader(this, group).run(); for (auto type : sorted(run)) if (std::all_of(cfg.cellGroups.begin(), cfg.cellGroups.end(), [type](const std::unordered_set &grp) { return !grp.count(type); })) CutSpreader(this, {type}).run(); // Run strict legalisation to find a valid bel for all cells update_all_chains(); spread_hpwl = total_hpwl(); legalise_placement_strict(true); update_all_chains(); legal_hpwl = total_hpwl(); auto run_stopt = std::chrono::high_resolution_clock::now(); IdString bucket_name = ctx->getBelBucketName(*run.begin()); log_info(" at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n", iter + 1, (run.size() > 1 ? "ALL" : bucket_name.c_str(ctx)), int(solved_hpwl), int(spread_hpwl), int(legal_hpwl), std::chrono::duration(run_stopt - run_startt).count()); } // Update timing weights if (cfg.timing_driven) tmg.run(); if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; stalled = 0; // Save solution solution.clear(); for (auto cell : sorted(ctx->cells)) { solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength); } } else { ++stalled; } for (auto &cl : cell_locs) { cl.second.legal_x = cl.second.x; cl.second.legal_y = cl.second.y; } ctx->yield(); ++iter; } // Apply saved solution for (auto &sc : solution) { CellInfo *cell = std::get<0>(sc); if (cell->bel != BelId()) ctx->unbindBel(cell->bel); } for (auto &sc : solution) { CellInfo *cell; BelId bel; PlaceStrength strength; std::tie(cell, bel, strength) = sc; ctx->bindBel(bel, cell, strength); } for (auto cell : sorted(ctx->cells)) { if (cell.second->bel == BelId()) log_error("Found unbound cell %s\n", cell.first.c_str(ctx)); if (ctx->getBoundBelCell(cell.second->bel) != cell.second) log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx)); if (ctx->debug) log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->nameOfBel(cell.second->bel)); } bool any_bad_placements = false; for (auto bel : ctx->getBels()) { CellInfo *cell = ctx->getBoundBelCell(bel); if (!ctx->isBelLocationValid(bel)) { std::string cell_text = "no cell"; if (cell != nullptr) cell_text = std::string("cell '") + ctx->nameOf(cell) + "'"; log_warning("post-placement validity check failed for Bel '%s' " "(%s)\n", ctx->nameOfBel(bel), cell_text.c_str()); any_bad_placements = true; } } if (any_bad_placements) { return false; } auto endtt = std::chrono::high_resolution_clock::now(); log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); log_info(" of which solving equations: %.02fs\n", solve_time); log_info(" of which spreading cells: %.02fs\n", cl_time); log_info(" of which strict legalisation: %.02fs\n", sl_time); ctx->check(); lock.unlock_early(); if (!placer1_refine(ctx, Placer1Cfg(ctx))) { return false; } return true; } private: Context *ctx; PlacerHeapCfg cfg; int max_x = 0, max_y = 0; FastBels fast_bels; std::unordered_map> bel_types; TimingAnalyser tmg; struct BoundingBox { // Actual bounding box int x0 = 0, x1 = 0, y0 = 0, y1 = 0; }; std::unordered_map constraint_region_bounds; // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom // structure instead struct CellLocation { int x, y; int legal_x, legal_y; double rawx, rawy; bool locked, global; }; std::unordered_map cell_locs; // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains // (only the root of each macro is placed.) std::vector place_cells; // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place // cells of a certain type) std::vector solve_cells; // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent // where chains are within chains std::unordered_map chain_root; std::unordered_map chain_size; // The offset from chain_root to a cell in the chain std::unordered_map> cell_offsets; // Performance counting double solve_time = 0, cl_time = 0, sl_time = 0; // Place cells with the BEL attribute set to constrain them void place_constraints() { size_t placed_cells = 0; // Initial constraints placer for (auto &cell_entry : ctx->cells) { CellInfo *cell = cell_entry.second.get(); auto loc = cell->attrs.find(ctx->id("BEL")); if (loc != cell->attrs.end()) { std::string loc_name = loc->second.as_string(); BelId bel = ctx->getBelByNameStr(loc_name); if (bel == BelId()) { log_error("No Bel named \'%s\' located for " "this chip (processing BEL attribute on \'%s\')\n", loc_name.c_str(), cell->name.c_str(ctx)); } if (!ctx->isValidBelForCellType(cell->type, bel)) { IdString bel_type = ctx->getBelType(bel); log_error("Bel \'%s\' of type \'%s\' does not match cell " "\'%s\' of type \'%s\'\n", loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); } auto bound_cell = ctx->getBoundBelCell(bel); if (bound_cell) { log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); } ctx->bindBel(bel, cell, STRENGTH_USER); if (!ctx->isBelLocationValid(bel)) { IdString bel_type = ctx->getBelType(bel); log_error("Bel \'%s\' of type \'%s\' is not valid for cell " "\'%s\' of type \'%s\'\n", loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); } placed_cells++; } } log_info("Placed %d cells based on constraints.\n", int(placed_cells)); ctx->yield(); } void build_fast_bels() { for (auto bel : ctx->getBels()) { if (!ctx->checkBelAvail(bel)) continue; Loc loc = ctx->getBelLocation(bel); max_x = std::max(max_x, loc.x); max_y = std::max(max_y, loc.y); } std::unordered_set cell_types_in_use; std::unordered_set buckets_in_use; for (auto cell : sorted(ctx->cells)) { IdString cell_type = cell.second->type; cell_types_in_use.insert(cell_type); BelBucketId bucket = ctx->getBelBucketForCellType(cell_type); buckets_in_use.insert(bucket); } for (auto cell_type : cell_types_in_use) { fast_bels.addCellType(cell_type); } for (auto bucket : buckets_in_use) { fast_bels.addBelBucket(bucket); } // Determine bounding boxes of region constraints for (auto ®ion : sorted(ctx->region)) { Region *r = region.second; BoundingBox bb; if (r->constr_bels) { bb.x0 = std::numeric_limits::max(); bb.x1 = std::numeric_limits::min(); bb.y0 = std::numeric_limits::max(); bb.y1 = std::numeric_limits::min(); for (auto bel : r->bels) { Loc loc = ctx->getBelLocation(bel); bb.x0 = std::min(bb.x0, loc.x); bb.x1 = std::max(bb.x1, loc.x); bb.y0 = std::min(bb.y0, loc.y); bb.y1 = std::max(bb.y1, loc.y); } } else { bb.x0 = 0; bb.y0 = 0; bb.x1 = max_x; bb.y1 = max_y; } constraint_region_bounds[r->name] = bb; } } // Build and solve in one direction void build_solve_direction(bool yaxis, int iter) { for (int i = 0; i < 5; i++) { EquationSystem esx(solve_cells.size(), solve_cells.size()); build_equations(esx, yaxis, iter); solve_equations(esx, yaxis); } } // Check if a cell has any meaningful connectivity bool has_connectivity(CellInfo *cell) { for (auto port : cell->ports) { if (port.second.net != nullptr && port.second.net->driver.cell != nullptr && !port.second.net->users.empty()) return true; } return false; } // Build up a random initial placement, without regard to legality // FIXME: Are there better approaches to the initial placement (e.g. greedy?) void seed_placement() { std::unordered_set cell_types; for (const auto &cell : ctx->cells) { cell_types.insert(cell.second->type); } std::unordered_set bels_used; std::unordered_map> available_bels; for (auto bel : ctx->getBels()) { if (!ctx->checkBelAvail(bel)) { continue; } for (auto cell_type : cell_types) { if (ctx->isValidBelForCellType(cell_type, bel)) { available_bels[cell_type].push_back(bel); } } } for (auto &t : available_bels) { ctx->shuffle(t.second.begin(), t.second.end()); } for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; if (ci->bel != BelId()) { Loc loc = ctx->getBelLocation(ci->bel); cell_locs[cell.first].x = loc.x; cell_locs[cell.first].y = loc.y; cell_locs[cell.first].locked = true; cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); } else if (ci->constr_parent == nullptr) { bool placed = false; int attempt_count = 0; while (!placed) { ++attempt_count; if (attempt_count > 25000) { log_error("Unable to find a placement location for cell '%s'\n", ci->name.c_str(ctx)); } // Make sure this cell type is in the available BEL map at // all. if (!available_bels.count(ci->type)) { log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n", ci->name.c_str(ctx), ci->type.c_str(ctx)); } // Find an unused BEL from bels_for_cell_type. auto &bels_for_cell_type = available_bels.at(ci->type); BelId bel; while (true) { if (bels_for_cell_type.empty()) { log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n", ci->name.c_str(ctx), ci->type.c_str(ctx)); } BelId candidate_bel = bels_for_cell_type.back(); bels_for_cell_type.pop_back(); if (bels_used.count(candidate_bel)) { // candidate_bel has already been used by another // cell type, skip it. continue; } bel = candidate_bel; break; } Loc loc = ctx->getBelLocation(bel); cell_locs[cell.first].x = loc.x; cell_locs[cell.first].y = loc.y; cell_locs[cell.first].locked = false; cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); // FIXME if (has_connectivity(cell.second) && !cfg.ioBufTypes.count(ci->type)) { bels_used.insert(bel); place_cells.push_back(ci); placed = true; } else { ctx->bindBel(bel, ci, STRENGTH_STRONG); if (ctx->isBelLocationValid(bel)) { cell_locs[cell.first].locked = true; placed = true; bels_used.insert(bel); } else { ctx->unbindBel(bel); available_bels.at(ci->type).push_front(bel); } } } } } } // Setup the cells to be solved, returns the number of rows int setup_solve_cells(std::unordered_set *buckets = nullptr) { int row = 0; solve_cells.clear(); // First clear the udata of all cells for (auto cell : sorted(ctx->cells)) cell.second->udata = dont_solve; // Then update cells to be placed, which excludes cell children for (auto cell : place_cells) { if (buckets && !buckets->count(ctx->getBelBucketForCellType(cell->type))) continue; cell->udata = row++; solve_cells.push_back(cell); } // Finally, update the udata of children for (auto chained : chain_root) ctx->cells.at(chained.first)->udata = chained.second->udata; return row; } // Update the location of all children of a chain void update_chain(CellInfo *cell, CellInfo *root) { const auto &base = cell_locs[cell->name]; for (auto child : cell->constr_children) { // FIXME: Improve handling of heterogeneous chains if (child->type == root->type) chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) cell_locs[child->name].x = std::max(0, std::min(max_x, base.x + child->constr_x)); else cell_locs[child->name].x = base.x; // better handling of UNCONSTR? if (child->constr_y != child->UNCONSTR) cell_locs[child->name].y = std::max(0, std::min(max_y, base.y + child->constr_y)); else cell_locs[child->name].y = base.y; // better handling of UNCONSTR? chain_root[child->name] = root; if (!child->constr_children.empty()) update_chain(child, root); } } // Update all chains void update_all_chains() { for (auto cell : place_cells) { chain_size[cell->name] = 1; if (!cell->constr_children.empty()) update_chain(cell, cell); } } // Run a function on all ports of a net - including the driver and all users template void foreach_port(NetInfo *net, Tf func) { if (net->driver.cell != nullptr) func(net->driver, -1); for (size_t i = 0; i < net->users.size(); i++) func(net->users.at(i), i); } // Build the system of equations for either X or Y void build_equations(EquationSystem &es, bool yaxis, int iter = -1) { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; auto legal_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; }; es.reset(); for (auto net : sorted(ctx->nets)) { NetInfo *ni = net.second; if (ni->driver.cell == nullptr) continue; if (ni->users.empty()) continue; if (cell_locs.at(ni->driver.cell->name).global) continue; // Find the bounds of the net in this axis, and the ports that correspond to these bounds PortRef *lbport = nullptr, *ubport = nullptr; int lbpos = std::numeric_limits::max(), ubpos = std::numeric_limits::min(); foreach_port(ni, [&](PortRef &port, int user_idx) { int pos = cell_pos(port.cell); if (pos < lbpos) { lbpos = pos; lbport = &port; } if (pos > ubpos) { ubpos = pos; ubport = &port; } }); NPNR_ASSERT(lbport != nullptr); NPNR_ASSERT(ubport != nullptr); auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) { if (eqn.cell->udata == dont_solve) return; int row = eqn.cell->udata; int v_pos = cell_pos(var.cell); if (var.cell->udata != dont_solve) { es.add_coeff(row, var.cell->udata, weight); } else { es.add_rhs(row, -v_pos * weight); } if (cell_offsets.count(var.cell->name)) { es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second : cell_offsets.at(var.cell->name).first) * weight); } }; // Add all relevant connections to the matrix foreach_port(ni, [&](PortRef &port, int user_idx) { int this_pos = cell_pos(port.cell); auto process_arc = [&](PortRef *other) { if (other == &port) return; int o_pos = cell_pos(other->cell); double weight = 1.0 / (ni->users.size() * std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(o_pos - this_pos))); if (user_idx != -1) { weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)), cfg.criticalityExponent)); } // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, // if the other end isn't fixed stamp_equation(port, port, weight); stamp_equation(port, *other, -weight); stamp_equation(*other, *other, weight); stamp_equation(*other, port, -weight); }; process_arc(lbport); process_arc(ubport); }); } if (iter != -1) { float alpha = cfg.alpha; for (size_t row = 0; row < solve_cells.size(); row++) { int l_pos = legal_pos(solve_cells.at(row)); int c_pos = cell_pos(solve_cells.at(row)); double weight = alpha * iter / std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(l_pos - c_pos)); // Add an arc from legalised to current position es.add_coeff(row, row, weight); es.add_rhs(row, weight * l_pos); } } } // Build the system of equations for either X or Y void solve_equations(EquationSystem &es, bool yaxis) { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); es.solve(vals, cfg.solverTolerance); for (size_t i = 0; i < vals.size(); i++) if (yaxis) { cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i)))); if (solve_cells.at(i)->region != nullptr) cell_locs.at(solve_cells.at(i)->name).y = limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).y, true); } else { cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); if (solve_cells.at(i)->region != nullptr) cell_locs.at(solve_cells.at(i)->name).x = limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).x, false); } } // Compute HPWL wirelen_t total_hpwl() { wirelen_t hpwl = 0; for (auto net : sorted(ctx->nets)) { NetInfo *ni = net.second; if (ni->driver.cell == nullptr) continue; CellLocation &drvloc = cell_locs.at(ni->driver.cell->name); if (drvloc.global) continue; int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y; for (auto &user : ni->users) { CellLocation &usrloc = cell_locs.at(user.cell->name); xmin = std::min(xmin, usrloc.x); xmax = std::max(xmax, usrloc.x); ymin = std::min(ymin, usrloc.y); ymax = std::max(ymax, usrloc.y); } hpwl += cfg.hpwl_scale_x * (xmax - xmin) + cfg.hpwl_scale_y * (ymax - ymin); } return hpwl; } // Strict placement legalisation, performed after the initial HeAP spreading void legalise_placement_strict(bool require_validity = false) { auto startt = std::chrono::high_resolution_clock::now(); // Unbind all cells placed in this solution for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; if (ci->bel != BelId() && (ci->udata != dont_solve || (chain_root.count(ci->name) && chain_root.at(ci->name)->udata != dont_solve))) ctx->unbindBel(ci->bel); } // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using // the simple greedy largest-macro-first approach. std::priority_queue> remaining; for (auto cell : solve_cells) { remaining.emplace(chain_size[cell->name], cell->name); } int ripup_radius = 2; int total_iters = 0; int total_iters_noreset = 0; while (!remaining.empty()) { auto top = remaining.top(); remaining.pop(); CellInfo *ci = ctx->cells.at(top.second).get(); // Was now placed, ignore if (ci->bel != BelId()) continue; // log_info(" Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx)); FastBels::FastBelsData *fb; fast_bels.getBelsForCellType(ci->type, &fb); int radius = 0; int iter = 0; int iter_at_radius = 0; bool placed = false; BelId bestBel; int best_inp_len = std::numeric_limits::max(); total_iters++; total_iters_noreset++; if (total_iters > int(solve_cells.size())) { total_iters = 0; ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2); } if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) { log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n"); } while (!placed) { // Set a conservative timeout if (iter > std::max(10000, 3 * int(ctx->cells.size()))) log_error("Unable to find legal placement for cell '%s', check constraints and utilisation.\n", ctx->nameOf(ci)); // Determine a search radius around the solver location (which increases over time) that is clamped to // the region constraint for the cell (if applicable) int rx = radius, ry = radius; if (ci->region != nullptr) { rx = std::min(radius, (constraint_region_bounds[ci->region->name].x1 - constraint_region_bounds[ci->region->name].x0) / 2 + 1); ry = std::min(radius, (constraint_region_bounds[ci->region->name].y1 - constraint_region_bounds[ci->region->name].y0) / 2 + 1); } // Pick a random X and Y location within our search radius int nx = ctx->rng(2 * rx + 1) + std::max(cell_locs.at(ci->name).x - rx, 0); int ny = ctx->rng(2 * ry + 1) + std::max(cell_locs.at(ci->name).y - ry, 0); iter++; iter_at_radius++; if (iter >= (10 * (radius + 1))) { // No luck yet, increase radius radius = std::min(std::max(max_x, max_y), radius + 1); while (radius < std::max(max_x, max_y)) { // Keep increasing the radius until it will actually increase the number of cells we are // checking (e.g. BRAM and DSP will not be in all cols/rows), so we don't waste effort for (int x = std::max(0, cell_locs.at(ci->name).x - radius); x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) { if (x >= int(fb->size())) break; for (int y = std::max(0, cell_locs.at(ci->name).y - radius); y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) { if (y >= int(fb->at(x).size())) break; if (fb->at(x).at(y).size() > 0) goto notempty; } } radius = std::min(std::max(max_x, max_y), radius + 1); } notempty: iter_at_radius = 0; iter = 0; } // If our randomly chosen cooridnate is out of bounds; or points to a tile with no relevant bels; ignore // it if (nx < 0 || nx > max_x) continue; if (ny < 0 || ny > max_y) continue; if (nx >= int(fb->size())) continue; if (ny >= int(fb->at(nx).size())) continue; if (fb->at(nx).at(ny).empty()) continue; // The number of attempts to find a location to try int need_to_explore = 2 * radius; // If we have found at least one legal location; and made enough attempts; assume it's good enough and // finish if (iter_at_radius >= need_to_explore && bestBel != BelId()) { CellInfo *bound = ctx->getBoundBelCell(bestBel); if (bound != nullptr) { ctx->unbindBel(bound->bel); remaining.emplace(chain_size[bound->name], bound->name); } ctx->bindBel(bestBel, ci, STRENGTH_WEAK); placed = true; Loc loc = ctx->getBelLocation(bestBel); cell_locs[ci->name].x = loc.x; cell_locs[ci->name].y = loc.y; break; } if (ci->constr_children.empty() && !ci->constr_abs_z) { // The case where we have no relative constraints for (auto sz : fb->at(nx).at(ny)) { // Look through all bels in this tile; checking region constraint if applicable if (!ci->testRegion(sz)) continue; // Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets) // or occasionally trigger a tiebreaker if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { // Only rip up cells without constraints if (bound->isConstrained()) continue; ctx->unbindBel(bound->bel); } // Provisionally bind the bel ctx->bindBel(sz, ci, STRENGTH_WEAK); if (require_validity && !ctx->isBelLocationValid(sz)) { // New location is not legal; unbind the cell (and rebind the cell we ripped up if // applicable) ctx->unbindBel(sz); if (bound != nullptr) ctx->bindBel(sz, bound, STRENGTH_WEAK); } else if (iter_at_radius < need_to_explore) { // It's legal, but we haven't tried enough locations yet ctx->unbindBel(sz); if (bound != nullptr) ctx->bindBel(sz, bound, STRENGTH_WEAK); int input_len = 0; // Compute a fast input wirelength metric at this bel; and save if better than our last // try for (auto &port : ci->ports) { auto &p = port.second; if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr) continue; CellInfo *drv = p.net->driver.cell; auto drv_loc = cell_locs.find(drv->name); if (drv_loc == cell_locs.end()) continue; if (drv_loc->second.global) continue; input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny); } if (input_len < best_inp_len) { best_inp_len = input_len; bestBel = sz; } break; } else { // It's legal, and we've tried enough. Finish. if (bound != nullptr) remaining.emplace(chain_size[bound->name], bound->name); Loc loc = ctx->getBelLocation(sz); cell_locs[ci->name].x = loc.x; cell_locs[ci->name].y = loc.y; placed = true; break; } } } } else { // We do have relative constraints for (auto sz : fb->at(nx).at(ny)) { Loc loc = ctx->getBelLocation(sz); // Check that the absolute-z constraint is satisfied if applicable if (ci->constr_abs_z && loc.z != ci->constr_z) continue; // List of cells and their destination std::vector> targets; // List of bels we placed things at; and the cell that was there before if applicable std::vector> swaps_made; // List of (cell, new location) pairs to check std::queue> visit; // FIXME: this approach of having a visit queue is designed to deal with recursively chained // cells. But is this a case we really want to care about given the complexity it adds? Start by // considering the root cell at the root location visit.emplace(ci, loc); while (!visit.empty()) { CellInfo *vc = visit.front().first; NPNR_ASSERT(vc->bel == BelId()); Loc ploc = visit.front().second; visit.pop(); // Get the bel we're going to place this cell at BelId target = ctx->getBelByLocation(ploc); // Check it satisfies the region constraint if applicable if (!vc->testRegion(target)) goto fail; CellInfo *bound; // Check that the target bel exists and is of a suitable type if (target == BelId() || !ctx->isValidBelForCellType(vc->type, target)) goto fail; bound = ctx->getBoundBelCell(target); // Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain if (bound != nullptr) if (bound->isConstrained() || bound->belStrength > STRENGTH_WEAK) goto fail; targets.emplace_back(vc, target); for (auto child : vc->constr_children) { // For all the constrained children; compute the location we need to place them at and // add them to the queue visit.emplace(child, child->getConstrainedLoc(ploc)); } } // Actually perform the move; keeping track of the moves we make so we can revert them if needed for (auto &target : targets) { CellInfo *bound = ctx->getBoundBelCell(target.second); if (bound != nullptr) ctx->unbindBel(target.second); ctx->bindBel(target.second, target.first, STRENGTH_STRONG); swaps_made.emplace_back(target.second, bound); } // Check that the move we have made is legal for (auto &sm : swaps_made) { if (!ctx->isBelLocationValid(sm.first)) goto fail; } if (false) { fail: // If the move turned out to be illegal; revert all the moves we made for (auto &swap : swaps_made) { ctx->unbindBel(swap.first); if (swap.second != nullptr) ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK); } continue; } for (auto &target : targets) { Loc loc = ctx->getBelLocation(target.second); cell_locs[target.first->name].x = loc.x; cell_locs[target.first->name].y = loc.y; // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z); } for (auto &swap : swaps_made) { // Where we have ripped up cells; add them to the queue if (swap.second != nullptr) remaining.emplace(chain_size[swap.second->name], swap.second->name); } placed = true; break; } } } } auto endt = std::chrono::high_resolution_clock::now(); sl_time += std::chrono::duration(endt - startt).count(); } // Implementation of the cut-based spreading as described in the HeAP/SimPL papers template T limit_to_reg(Region *reg, T val, bool dir) { if (reg == nullptr) return val; int limit_low = dir ? constraint_region_bounds[reg->name].y0 : constraint_region_bounds[reg->name].x0; int limit_high = dir ? constraint_region_bounds[reg->name].y1 : constraint_region_bounds[reg->name].x1; return std::max(std::min(val, limit_high), limit_low); } struct ChainExtent { int x0, y0, x1, y1; }; struct SpreaderRegion { int id; int x0, y0, x1, y1; std::vector cells, bels; bool overused(float beta) const { for (size_t t = 0; t < cells.size(); t++) { if (bels.at(t) < 4) { if (cells.at(t) > bels.at(t)) return true; } else { if (cells.at(t) > beta * bels.at(t)) return true; } } return false; } }; class CutSpreader { public: CutSpreader(HeAPPlacer *p, const std::unordered_set &buckets) : p(p), ctx(p->ctx), buckets(buckets) { // Get fast BELs data for all buckets being Cut/Spread. size_t idx = 0; for (BelBucketId bucket : sorted(buckets)) { type_index[bucket] = idx; FastBels::FastBelsData *fast_bels; p->fast_bels.getBelsForBelBucket(bucket, &fast_bels); fb.push_back(fast_bels); ++idx; NPNR_ASSERT(fb.size() == idx); } } static int seq; void run() { auto startt = std::chrono::high_resolution_clock::now(); init(); find_overused_regions(); for (auto &r : regions) { if (merged_regions.count(r.id)) continue; #if 0 log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels); #endif } expand_regions(); std::queue> workqueue; #if 0 std::vector> orig; if (ctx->debug) for (auto c : p->solve_cells) orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy); #endif for (auto &r : regions) { if (merged_regions.count(r.id)) continue; #if 0 for (auto t : sorted(beltype)) { log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", t.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells.at(type_index.at(t)), r.bels.at(type_index.at(t))); } #endif workqueue.emplace(r.id, false); } while (!workqueue.empty()) { auto front = workqueue.front(); workqueue.pop(); auto &r = regions.at(front.first); if (std::all_of(r.cells.begin(), r.cells.end(), [](int x) { return x == 0; })) continue; auto res = cut_region(r, front.second); if (res) { workqueue.emplace(res->first, !front.second); workqueue.emplace(res->second, !front.second); } else { // Try the other dir, in case stuck in one direction only auto res2 = cut_region(r, !front.second); if (res2) { workqueue.emplace(res2->first, front.second); workqueue.emplace(res2->second, front.second); } } } #if 0 if (ctx->debug) { std::ofstream sp("spread" + std::to_string(seq) + ".csv"); for (size_t i = 0; i < p->solve_cells.size(); i++) { auto &c = p->solve_cells.at(i); if (c->type != beltype) continue; sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl; } std::ofstream oc("cells" + std::to_string(seq) + ".csv"); for (size_t y = 0; y <= p->max_y; y++) { for (size_t x = 0; x <= p->max_x; x++) { oc << cells_at_location.at(x).at(y).size() << ", "; } oc << std::endl; } ++seq; } #endif auto endt = std::chrono::high_resolution_clock::now(); p->cl_time += std::chrono::duration(endt - startt).count(); } private: HeAPPlacer *p; Context *ctx; std::unordered_set buckets; std::unordered_map type_index; std::vector>> occupancy; std::vector> groups; std::vector> chaines; std::map cell_extents; std::vector>> *> fb; std::vector regions; std::unordered_set merged_regions; // Cells at a location, sorted by real (not integer) x and y std::vector>> cells_at_location; int occ_at(int x, int y, int type) { return occupancy.at(x).at(y).at(type); } int bels_at(int x, int y, int type) { if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size())) return 0; return int(fb.at(type)->at(x).at(y).size()); } bool is_cell_fixed(const CellInfo &cell) const { return buckets.count(ctx->getBelBucketForCellType(cell.type)) == 0; } size_t cell_index(const CellInfo &cell) const { return type_index.at(ctx->getBelBucketForCellType(cell.type)); } void init() { occupancy.resize(p->max_x + 1, std::vector>(p->max_y + 1, std::vector(buckets.size(), 0))); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); cells_at_location.resize(p->max_x + 1, std::vector>(p->max_y + 1)); for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { for (int t = 0; t < int(buckets.size()); t++) { occupancy.at(x).at(y).at(t) = 0; } groups.at(x).at(y) = -1; chaines.at(x).at(y) = {x, y, x, y}; } auto set_chain_ext = [&](IdString cell, int x, int y) { if (!cell_extents.count(cell)) cell_extents[cell] = {x, y, x, y}; else { cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x); cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y); cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x); cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y); } }; for (auto &cell_loc : p->cell_locs) { IdString cell_name = cell_loc.first; const CellInfo &cell = *ctx->cells.at(cell_name); const CellLocation &loc = cell_loc.second; if (is_cell_fixed(cell)) { continue; } if (cell.belStrength > STRENGTH_STRONG) { continue; } occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++; // Compute ultimate extent of each chain root if (p->chain_root.count(cell_name)) { set_chain_ext(p->chain_root.at(cell_name)->name, loc.x, loc.y); } else if (!ctx->cells.at(cell_name)->constr_children.empty()) { set_chain_ext(cell_name, loc.x, loc.y); } } for (auto &cell_loc : p->cell_locs) { IdString cell_name = cell_loc.first; const CellInfo &cell = *ctx->cells.at(cell_name); const CellLocation &loc = cell_loc.second; if (is_cell_fixed(cell)) { continue; } // Transfer chain extents to the actual chains structure ChainExtent *ce = nullptr; if (p->chain_root.count(cell_name)) { ce = &(cell_extents.at(p->chain_root.at(cell_name)->name)); } else if (!ctx->cells.at(cell_name)->constr_children.empty()) { ce = &(cell_extents.at(cell_name)); } if (ce) { auto &lce = chaines.at(loc.x).at(loc.y); lce.x0 = std::min(lce.x0, ce->x0); lce.y0 = std::min(lce.y0, ce->y0); lce.x1 = std::max(lce.x1, ce->x1); lce.y1 = std::max(lce.y1, ce->y1); } } for (auto cell : p->solve_cells) { if (is_cell_fixed(*cell)) { continue; } cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); } } void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee) { // Prevent grow_region from recursing while doing this for (int x = mergee.x0; x <= mergee.x1; x++) { for (int y = mergee.y0; y <= mergee.y1; y++) { // log_info("%d %d\n", groups.at(x).at(y), mergee.id); NPNR_ASSERT(groups.at(x).at(y) == mergee.id); groups.at(x).at(y) = merged.id; for (size_t t = 0; t < buckets.size(); t++) { merged.cells.at(t) += occ_at(x, y, t); merged.bels.at(t) += bels_at(x, y, t); } } } merged_regions.insert(mergee.id); grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); } void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false) { // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) return; int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1; r.x0 = std::min(r.x0, x0); r.y0 = std::min(r.y0, y0); r.x1 = std::max(r.x1, x1); r.y1 = std::max(r.y1, y1); auto process_location = [&](int x, int y) { // Merge with any overlapping regions if (groups.at(x).at(y) == -1) { for (size_t t = 0; t < buckets.size(); t++) { r.bels.at(t) += bels_at(x, y, t); r.cells.at(t) += occ_at(x, y, t); } } if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) merge_regions(r, regions.at(groups.at(x).at(y))); groups.at(x).at(y) = r.id; // Grow to cover any chains auto &chaine = chaines.at(x).at(y); grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1); }; for (int x = r.x0; x < old_x0; x++) for (int y = r.y0; y <= r.y1; y++) process_location(x, y); for (int x = old_x1 + 1; x <= x1; x++) for (int y = r.y0; y <= r.y1; y++) process_location(x, y); for (int y = r.y0; y < old_y0; y++) for (int x = r.x0; x <= r.x1; x++) process_location(x, y); for (int y = old_y1 + 1; y <= r.y1; y++) for (int x = r.x0; x <= r.x1; x++) process_location(x, y); } void find_overused_regions() { for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { // Either already in a group, or not overutilised. Ignore if (groups.at(x).at(y) != -1) continue; bool overutilised = false; for (size_t t = 0; t < buckets.size(); t++) { if (occ_at(x, y, t) > bels_at(x, y, t)) { overutilised = true; break; } } if (!overutilised) continue; // log_info("%d %d %d\n", x, y, occ_at(x, y)); int id = int(regions.size()); groups.at(x).at(y) = id; SpreaderRegion reg; reg.id = id; reg.x0 = reg.x1 = x; reg.y0 = reg.y1 = y; for (size_t t = 0; t < buckets.size(); t++) { reg.bels.push_back(bels_at(x, y, t)); reg.cells.push_back(occ_at(x, y, t)); } // Make sure we cover carries, etc grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); bool expanded = true; while (expanded) { expanded = false; // Keep trying expansion in x and y, until we find no over-occupancy cells // or hit grouped cells // First try expanding in x if (reg.x1 < p->max_x) { bool over_occ_x = false; for (int y1 = reg.y0; y1 <= reg.y1; y1++) { for (size_t t = 0; t < buckets.size(); t++) { if (occ_at(reg.x1 + 1, y1, t) > bels_at(reg.x1 + 1, y1, t)) { over_occ_x = true; break; } } } if (over_occ_x) { expanded = true; grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); } } if (reg.y1 < p->max_y) { bool over_occ_y = false; for (int x1 = reg.x0; x1 <= reg.x1; x1++) { for (size_t t = 0; t < buckets.size(); t++) { if (occ_at(x1, reg.y1 + 1, t) > bels_at(x1, reg.y1 + 1, t)) { over_occ_y = true; break; } } } if (over_occ_y) { expanded = true; grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); } } } regions.push_back(reg); } } void expand_regions() { std::queue overu_regions; float beta = p->cfg.beta; for (auto &r : regions) { if (!merged_regions.count(r.id) && r.overused(beta)) overu_regions.push(r.id); } while (!overu_regions.empty()) { int rid = overu_regions.front(); overu_regions.pop(); if (merged_regions.count(rid)) continue; auto ® = regions.at(rid); while (reg.overused(beta)) { bool changed = false; for (int j = 0; j < p->cfg.spread_scale_x; j++) { if (reg.x0 > 0) { grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); changed = true; if (!reg.overused(beta)) break; } if (reg.x1 < p->max_x) { grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); changed = true; if (!reg.overused(beta)) break; } } for (int j = 0; j < p->cfg.spread_scale_y; j++) { if (reg.y0 > 0) { grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); changed = true; if (!reg.overused(beta)) break; } if (reg.y1 < p->max_y) { grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); changed = true; if (!reg.overused(beta)) break; } } if (!changed) { for (auto bucket : sorted(buckets)) { if (reg.cells > reg.bels) { IdString bucket_name = ctx->getBelBucketName(bucket); log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, reg.y1, reg.cells.at(type_index.at(bucket)), bucket_name.c_str(ctx)); } } break; } } } } // Implementation of the recursive cut-based spreading as described in the HeAP paper // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir std::vector cut_cells; boost::optional> cut_region(SpreaderRegion &r, bool dir) { cut_cells.clear(); auto &cal = cells_at_location; int total_cells = 0, total_bels = 0; for (int x = r.x0; x <= r.x1; x++) { for (int y = r.y0; y <= r.y1; y++) { std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); for (size_t t = 0; t < buckets.size(); t++) total_bels += bels_at(x, y, t); } } for (auto &cell : cut_cells) { total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; } std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); }); if (cut_cells.size() < 2) return {}; // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut int pivot_cells = 0; int pivot = 0; for (auto &cell : cut_cells) { pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; if (pivot_cells >= total_cells / 2) break; pivot++; } if (pivot >= int(cut_cells.size())) { pivot = int(cut_cells.size()) - 1; } // log_info("orig pivot %d/%d lc %d rc %d\n", pivot, int(cut_cells.size()), pivot_cells, total_cells - // pivot_cells); // Find the clearance required either side of the pivot int clearance_l = 0, clearance_r = 0; for (size_t i = 0; i < cut_cells.size(); i++) { int size; if (cell_extents.count(cut_cells.at(i)->name)) { auto &ce = cell_extents.at(cut_cells.at(i)->name); size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1); } else { size = 1; } if (int(i) < pivot) clearance_l = std::max(clearance_l, size); else clearance_r = std::max(clearance_r, size); } // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains // still fit // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any // bels of the appropriate type int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1; while (trimmed_l < (dir ? r.y1 : r.x1)) { bool have_bels = false; for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) { for (size_t t = 0; t < buckets.size(); t++) { if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i, t) > 0) { have_bels = true; break; } } } if (have_bels) break; trimmed_l++; } while (trimmed_r > (dir ? r.y0 : r.x0)) { bool have_bels = false; for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) { for (size_t t = 0; t < buckets.size(); t++) { if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i, t) > 0) { have_bels = true; break; } } } if (have_bels) break; trimmed_r--; } // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst // meeting the clearance requirements for any large macros std::vector left_cells_v(buckets.size(), 0), right_cells_v(buckets.size(), 0); std::vector left_bels_v(buckets.size(), 0), right_bels_v(r.bels); for (int i = 0; i <= pivot; i++) left_cells_v.at(cell_index(*cut_cells.at(i))) += p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; for (int i = pivot + 1; i < int(cut_cells.size()); i++) right_cells_v.at(cell_index(*cut_cells.at(i))) += p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; int best_tgt_cut = -1; double best_deltaU = std::numeric_limits::max(); // std::pair target_cut_bels; std::vector slither_bels(buckets.size(), 0); for (int i = trimmed_l; i <= trimmed_r; i++) { for (size_t t = 0; t < buckets.size(); t++) slither_bels.at(t) = 0; for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { for (size_t t = 0; t < buckets.size(); t++) slither_bels.at(t) += dir ? bels_at(j, i, t) : bels_at(i, j, t); } for (size_t t = 0; t < buckets.size(); t++) { left_bels_v.at(t) += slither_bels.at(t); right_bels_v.at(t) -= slither_bels.at(t); } if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { // Solution is potentially valid double aU = 0; for (size_t t = 0; t < buckets.size(); t++) aU += (left_cells_v.at(t) + right_cells_v.at(t)) * std::abs(double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1))); if (aU < best_deltaU) { best_deltaU = aU; best_tgt_cut = i; } } } if (best_tgt_cut == -1) return {}; // left_bels = target_cut_bels.first; // right_bels = target_cut_bels.second; for (size_t t = 0; t < buckets.size(); t++) { left_bels_v.at(t) = 0; right_bels_v.at(t) = 0; } for (int x = r.x0; x <= (dir ? r.x1 : best_tgt_cut); x++) for (int y = r.y0; y <= (dir ? best_tgt_cut : r.y1); y++) { for (size_t t = 0; t < buckets.size(); t++) { left_bels_v.at(t) += bels_at(x, y, t); } } for (int x = dir ? r.x0 : (best_tgt_cut + 1); x <= r.x1; x++) for (int y = dir ? (best_tgt_cut + 1) : r.y0; y <= r.y1; y++) { for (size_t t = 0; t < buckets.size(); t++) { right_bels_v.at(t) += bels_at(x, y, t); } } if (std::accumulate(left_bels_v.begin(), left_bels_v.end(), 0) == 0 || std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0) == 0) return {}; // Perturb the source cut to eliminate overutilisation auto is_part_overutil = [&](bool r) { double delta = 0; for (size_t t = 0; t < left_cells_v.size(); t++) { delta += double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1)); } return r ? delta < 0 : delta > 0; }; while (pivot > 0 && is_part_overutil(false)) { auto &move_cell = cut_cells.at(pivot); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; left_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size; right_cells_v.at(cell_index(*cut_cells.at(pivot))) += size; pivot--; } while (pivot < int(cut_cells.size()) - 1 && is_part_overutil(true)) { auto &move_cell = cut_cells.at(pivot + 1); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; left_cells_v.at(cell_index(*cut_cells.at(pivot))) += size; right_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size; pivot++; } // Split regions into bins, and then spread cells by linear interpolation within those bins auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { int N = cells_end - cells_start; if (N <= 2) { for (int i = cells_start; i < cells_end; i++) { auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy : p->cell_locs.at(cut_cells.at(i)->name).rawx; pos = area_l + i * ((area_r - area_l) / N); } return; } // Split region into up to 10 (K) bins int K = std::min(N, 10); std::vector> bin_bounds; // [(cell start, area start)] bin_bounds.emplace_back(cells_start, area_l); for (int i = 1; i < K; i++) bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K); bin_bounds.emplace_back(cells_end, area_r + 0.99); for (int i = 0; i < K; i++) { auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left); for (int j = bl.first; j < br.first; j++) { Region *cr = cut_cells.at(j)->region; if (cr != nullptr) { // Limit spreading bounds to constraint region; if applicable double brsc = p->limit_to_reg(cr, br.second, dir); double blsc = p->limit_to_reg(cr, bl.second, dir); double mr = (brsc - blsc) / std::max(0.00001, orig_right - orig_left); auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; NPNR_ASSERT(pos >= orig_left && pos <= orig_right); pos = blsc + mr * (pos - orig_left); } else { auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; NPNR_ASSERT(pos >= orig_left && pos <= orig_right); pos = bl.second + m * (pos - orig_left); } } } }; spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut); spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r); // Update various data structures for (int x = r.x0; x <= r.x1; x++) for (int y = r.y0; y <= r.y1; y++) { cells_at_location.at(x).at(y).clear(); } for (auto cell : cut_cells) { auto &cl = p->cell_locs.at(cell->name); cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx))); cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy))); cells_at_location.at(cl.x).at(cl.y).push_back(cell); } SpreaderRegion rl, rr; rl.id = int(regions.size()); rl.x0 = r.x0; rl.y0 = r.y0; rl.x1 = dir ? r.x1 : best_tgt_cut; rl.y1 = dir ? best_tgt_cut : r.y1; rl.cells = left_cells_v; rl.bels = left_bels_v; rr.id = int(regions.size()) + 1; rr.x0 = dir ? r.x0 : (best_tgt_cut + 1); rr.y0 = dir ? (best_tgt_cut + 1) : r.y0; rr.x1 = r.x1; rr.y1 = r.y1; rr.cells = right_cells_v; rr.bels = right_bels_v; regions.push_back(rl); regions.push_back(rr); for (int x = rl.x0; x <= rl.x1; x++) for (int y = rl.y0; y <= rl.y1; y++) groups.at(x).at(y) = rl.id; for (int x = rr.x0; x <= rr.x1; x++) for (int y = rr.y0; y <= rr.y1; y++) groups.at(x).at(y) = rr.id; return std::make_pair(rl.id, rr.id); }; }; typedef decltype(CellInfo::udata) cell_udata_t; cell_udata_t dont_solve = std::numeric_limits::max(); }; int HeAPPlacer::CutSpreader::seq = 0; bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); } PlacerHeapCfg::PlacerHeapCfg(Context *ctx) { alpha = ctx->setting("placerHeap/alpha"); beta = ctx->setting("placerHeap/beta"); criticalityExponent = ctx->setting("placerHeap/criticalityExponent"); timingWeight = ctx->setting("placerHeap/timingWeight"); timing_driven = ctx->setting("timing_driven"); solverTolerance = 1e-5; placeAllAtOnce = false; hpwl_scale_x = 1; hpwl_scale_y = 1; spread_scale_x = 1; spread_scale_y = 1; } NEXTPNR_NAMESPACE_END #else #include "log.h" #include "nextpnr.h" #include "placer_heap.h" NEXTPNR_NAMESPACE_BEGIN bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { log_error("nextpnr was built without the HeAP placer\n"); return false; } PlacerHeapCfg::PlacerHeapCfg(Context *ctx) {} NEXTPNR_NAMESPACE_END #endif