From 7bda6f15a9c906f54408b8b35915113e711c887c Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:25:20 +0000 Subject: placer1: Allow scaling HPWL differently in each direction Signed-off-by: David Shah --- common/placer1.cc | 19 +++++++++++++------ common/placer1.h | 1 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 96424957..9c1eaba4 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -68,7 +68,10 @@ class SAPlacer int x0 = 0, x1 = 0, y0 = 0, y1 = 0; // Number of cells at each extremity int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; - wirelen_t hpwl() const { return wirelen_t((x1 - x0) + (y1 - y0)); } + wirelen_t hpwl(const Placer1Cfg &cfg) const + { + return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); + } }; public: @@ -689,8 +692,10 @@ class SAPlacer int dx = diameter, dy = diameter; if (cell->region != nullptr && cell->region->constr_bels) { - dx = std::min(diameter, (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1); - dy = std::min(diameter, (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1); + dx = std::min(cfg.hpwl_scale_x * diameter, + (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1); + dy = std::min(cfg.hpwl_scale_y * diameter, + (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1); // Clamp location to within bounds curr_loc.x = std::max(region_bounds[cell->region->name].x0, curr_loc.x); curr_loc.x = std::min(region_bounds[cell->region->name].x1, curr_loc.x); @@ -820,7 +825,7 @@ class SAPlacer { wirelen_t cost = 0; for (const auto &net : net_bounds) - cost += net.hpwl(); + cost += net.hpwl(cfg); return cost; } @@ -1061,10 +1066,10 @@ class SAPlacer } for (const auto &bc : md.bounds_changed_nets_x) - md.wirelen_delta += md.new_net_bounds[bc].hpwl() - net_bounds[bc].hpwl(); + md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg); for (const auto &bc : md.bounds_changed_nets_y) if (md.already_bounds_changed_x[bc] == MoveChangeData::NO_CHANGE) - md.wirelen_delta += md.new_net_bounds[bc].hpwl() - net_bounds[bc].hpwl(); + md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg); if (cfg.timing_driven) { for (const auto &tc : md.changed_arcs) { @@ -1145,6 +1150,8 @@ Placer1Cfg::Placer1Cfg(Context *ctx) timingFanoutThresh = std::numeric_limits::max(); timing_driven = ctx->setting("timing_driven"); slack_redist_iter = ctx->setting("slack_redist_iter"); + hpwl_scale_x = 1; + hpwl_scale_y = 1; } bool placer1(Context *ctx, Placer1Cfg cfg) diff --git a/common/placer1.h b/common/placer1.h index 1356db3e..e803d592 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -34,6 +34,7 @@ struct Placer1Cfg int timingFanoutThresh; bool timing_driven; int slack_redist_iter; + int hpwl_scale_x, hpwl_scale_y; }; extern bool placer1(Context *ctx, Placer1Cfg cfg); -- cgit v1.2.3 From 1ff060c5ad066b802db15fb4b8a2270073ec8cf5 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:28:11 +0000 Subject: HeAP: Make solver tolerance arch-configurable Signed-off-by: David Shah --- common/placer_heap.cc | 7 ++++--- common/placer_heap.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index f336f6e4..46b572c6 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -97,7 +97,7 @@ template struct EquationSystem void add_rhs(int row, T val) { rhs[row] += val; } - void solve(std::vector &x) + void solve(std::vector &x, float tolerance) { using namespace Eigen; if (x.empty()) @@ -123,7 +123,7 @@ template struct EquationSystem vb[i] = rhs.at(i); ConjugateGradient, Lower | Upper> solver; - solver.setTolerance(1e-5); + solver.setTolerance(tolerance); VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx); for (int i = 0; i < int(x.size()); i++) x.at(i) = xr[i]; @@ -712,7 +712,7 @@ class HeAPPlacer auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); - es.solve(vals); + es.solve(vals, cfg.solverTolerance); for (size_t i = 0; i < vals.size(); i++) if (yaxis) { cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); @@ -1610,6 +1610,7 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx) criticalityExponent = ctx->setting("placerHeap/criticalityExponent", 2); timingWeight = ctx->setting("placerHeap/timingWeight", 10); timing_driven = ctx->setting("timing_driven"); + solverTolerance = 1e-5; } NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.h b/common/placer_heap.h index a018aef8..94ac5229 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -39,6 +39,7 @@ struct PlacerHeapCfg float criticalityExponent; float timingWeight; bool timing_driven; + float solverTolerance; std::unordered_set ioBufTypes; }; -- cgit v1.2.3 From d1f5cdcb93cb11c22e696e5885b5c4f576e47b28 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:40:23 +0000 Subject: HeAP: Improve handling of heterogeneous slice arches Signed-off-by: David Shah --- common/placer_heap.cc | 294 ++++++++++++++++++++++++++++++++++---------------- common/placer_heap.h | 5 + 2 files changed, 205 insertions(+), 94 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 46b572c6..7d1f2033 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -190,6 +190,13 @@ class HeAPPlacer break; } + if (cfg.placeAllAtOnce) { + // Never want to deal with LUTs, FFs, MUXFxs seperately, + // for now disable all single-cell-type runs and only have heteregenous + // runs + heap_runs.clear(); + } + heap_runs.push_back(all_celltypes); // The main HeAP placer loop log_info("Running main analytical placer.\n"); @@ -218,8 +225,14 @@ class HeAPPlacer solved_hpwl = total_hpwl(); update_all_chains(); + + for (const auto &group : cfg.cellGroups) + CutSpreader(this, group).run(); + for (auto type : sorted(run)) - CutSpreader(this, type).run(); + if (std::all_of(cfg.cellGroups.begin(), cfg.cellGroups.end(), + [type](const std::unordered_set &grp) { return !grp.count(type); })) + CutSpreader(this, {type}).run(); update_all_chains(); spread_hpwl = total_hpwl(); @@ -577,7 +590,9 @@ class HeAPPlacer { const auto &base = cell_locs[cell->name]; for (auto child : cell->constr_children) { - chain_size[root->name]++; + // FIXME: Improve handling of heterogeneous chains + if (child->type == root->type) + chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) cell_locs[child->name].x = std::min(max_x, base.x + child->constr_x); else @@ -1033,22 +1048,33 @@ class HeAPPlacer { int id; int x0, y0, x1, y1; - int cells, bels; + std::vector cells, bels; bool overused() const { - if (bels < 4) - return cells > bels; - else - return cells > beta * bels; + for (size_t t = 0; t < cells.size(); t++) { + if (bels.at(t) < 4) { + if (cells.at(t) > bels.at(t)) + return true; + } else { + if (cells.at(t) > beta * bels.at(t)) + return true; + } + } + return false; } }; class CutSpreader { public: - CutSpreader(HeAPPlacer *p, IdString beltype) - : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) + CutSpreader(HeAPPlacer *p, const std::unordered_set &beltype) : p(p), ctx(p->ctx), beltype(beltype) { + int idx = 0; + for (IdString type : sorted(beltype)) { + type_index[type] = idx; + fb.emplace_back(&(p->fast_bels.at(std::get<0>(p->bel_types.at(type))))); + ++idx; + } } static int seq; void run() @@ -1076,8 +1102,11 @@ class HeAPPlacer if (merged_regions.count(r.id)) continue; #if 0 - log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - r.bels); + for (auto t : sorted(beltype)) { + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", t.c_str(ctx), r.x0, r.y0, r.x1, r.y1, + r.cells.at(type_index.at(t)), r.bels.at(type_index.at(t))); + } + #endif workqueue.emplace(r.id, false); // cut_region(r, false); @@ -1086,7 +1115,7 @@ class HeAPPlacer auto front = workqueue.front(); workqueue.pop(); auto &r = regions.at(front.first); - if (r.cells == 0) + if (std::all_of(r.cells.begin(), r.cells.end(), [](int x) { return x == 0; })) continue; auto res = cut_region(r, front.second); if (res) { @@ -1128,37 +1157,41 @@ class HeAPPlacer private: HeAPPlacer *p; Context *ctx; - IdString beltype; - std::vector> occupancy; + std::unordered_set beltype; + std::unordered_map type_index; + std::vector>> occupancy; std::vector> groups; std::vector> chaines; std::map cell_extents; - std::vector>> &fb; + std::vector>> *> fb; std::vector regions; std::unordered_set merged_regions; // Cells at a location, sorted by real (not integer) x and y std::vector>> cells_at_location; - int occ_at(int x, int y) { return occupancy.at(x).at(y); } + int occ_at(int x, int y, int type) { return occupancy.at(x).at(y).at(type); } - int bels_at(int x, int y) + int bels_at(int x, int y, int type) { - if (x >= int(fb.size()) || y >= int(fb.at(x).size())) + if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size())) return 0; - return int(fb.at(x).at(y).size()); + return int(fb.at(type)->at(x).at(y).size()); } void init() { - occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); + occupancy.resize(p->max_x + 1, + std::vector>(p->max_y + 1, std::vector(beltype.size(), 0))); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); cells_at_location.resize(p->max_x + 1, std::vector>(p->max_y + 1)); for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { - occupancy.at(x).at(y) = 0; + for (int t = 0; t < int(beltype.size()); t++) { + occupancy.at(x).at(y).at(t) = 0; + } groups.at(x).at(y) = -1; chaines.at(x).at(y) = {x, y, x, y}; } @@ -1175,11 +1208,11 @@ class HeAPPlacer }; for (auto &cell : p->cell_locs) { - if (ctx->cells.at(cell.first)->type != beltype) + if (!beltype.count(ctx->cells.at(cell.first)->type)) continue; if (ctx->cells.at(cell.first)->belStrength > STRENGTH_STRONG) continue; - occupancy.at(cell.second.x).at(cell.second.y)++; + occupancy.at(cell.second.x).at(cell.second.y).at(type_index.at(ctx->cells.at(cell.first)->type))++; // Compute ultimate extent of each chain root if (p->chain_root.count(cell.first)) { set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y); @@ -1188,7 +1221,7 @@ class HeAPPlacer } } for (auto &cell : p->cell_locs) { - if (ctx->cells.at(cell.first)->type != beltype) + if (!beltype.count(ctx->cells.at(cell.first)->type)) continue; // Transfer chain extents to the actual chaines structure ChainExtent *ce = nullptr; @@ -1205,7 +1238,7 @@ class HeAPPlacer } } for (auto cell : p->solve_cells) { - if (cell->type != beltype) + if (!beltype.count(cell->type)) continue; cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); } @@ -1218,8 +1251,10 @@ class HeAPPlacer // log_info("%d %d\n", groups.at(x).at(y), mergee.id); NPNR_ASSERT(groups.at(x).at(y) == mergee.id); groups.at(x).at(y) = merged.id; - merged.cells += occ_at(x, y); - merged.bels += bels_at(x, y); + for (size_t t = 0; t < beltype.size(); t++) { + merged.cells.at(t) += occ_at(x, y, t); + merged.bels.at(t) += bels_at(x, y, t); + } } merged_regions.insert(mergee.id); grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); @@ -1239,8 +1274,10 @@ class HeAPPlacer auto process_location = [&](int x, int y) { // Merge with any overlapping regions if (groups.at(x).at(y) == -1) { - r.bels += bels_at(x, y); - r.cells += occ_at(x, y); + for (int t = 0; t < int(beltype.size()); t++) { + r.bels.at(t) += bels_at(x, y, t); + r.cells.at(t) += occ_at(x, y, t); + } } if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) merge_regions(r, regions.at(groups.at(x).at(y))); @@ -1268,7 +1305,16 @@ class HeAPPlacer for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { // Either already in a group, or not overutilised. Ignore - if (groups.at(x).at(y) != -1 || (occ_at(x, y) <= bels_at(x, y))) + if (groups.at(x).at(y) != -1) + continue; + bool overutilised = false; + for (size_t t = 0; t < beltype.size(); t++) { + if (occ_at(x, y, t) > bels_at(x, y, t)) { + overutilised = true; + break; + } + } + if (!overutilised) continue; // log_info("%d %d %d\n", x, y, occ_at(x, y)); int id = int(regions.size()); @@ -1277,8 +1323,10 @@ class HeAPPlacer reg.id = id; reg.x0 = reg.x1 = x; reg.y0 = reg.y1 = y; - reg.bels = bels_at(x, y); - reg.cells = occ_at(x, y); + for (size_t t = 0; t < beltype.size(); t++) { + reg.bels.push_back(bels_at(x, y, t)); + reg.cells.push_back(occ_at(x, y, t)); + } // Make sure we cover carries, etc grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); @@ -1292,11 +1340,13 @@ class HeAPPlacer if (reg.x1 < p->max_x) { bool over_occ_x = false; for (int y1 = reg.y0; y1 <= reg.y1; y1++) { - if (occ_at(reg.x1 + 1, y1) > bels_at(reg.x1 + 1, y1)) { - // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1), - // bels_at(reg.x1 + 1, y1)); - over_occ_x = true; - break; + for (size_t t = 0; t < beltype.size(); t++) { + if (occ_at(reg.x1 + 1, y1, t) > bels_at(reg.x1 + 1, y1, t)) { + // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1), + // bels_at(reg.x1 + 1, y1)); + over_occ_x = true; + break; + } } } if (over_occ_x) { @@ -1308,11 +1358,13 @@ class HeAPPlacer if (reg.y1 < p->max_y) { bool over_occ_y = false; for (int x1 = reg.x0; x1 <= reg.x1; x1++) { - if (occ_at(x1, reg.y1 + 1) > bels_at(x1, reg.y1 + 1)) { - // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1), - // bels_at(x1, reg.y1 + 1)); - over_occ_y = true; - break; + for (size_t t = 0; t < beltype.size(); t++) { + if (occ_at(x1, reg.y1 + 1, t) > bels_at(x1, reg.y1 + 1, t)) { + // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1), + // bels_at(x1, reg.y1 + 1)); + over_occ_y = true; + break; + } } } if (over_occ_y) { @@ -1340,17 +1392,20 @@ class HeAPPlacer auto ® = regions.at(rid); while (reg.overused()) { bool changed = false; - if (reg.x0 > 0) { - grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); - changed = true; - if (!reg.overused()) - break; - } - if (reg.x1 < p->max_x) { - grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); - changed = true; - if (!reg.overused()) - break; + // 2 x units for every 1 y unit to account for INT gaps between CLBs + for (int j = 0; j < 2; j++) { + if (reg.x0 > 0) { + grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.x1 < p->max_x) { + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } } if (reg.y0 > 0) { grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); @@ -1365,11 +1420,12 @@ class HeAPPlacer break; } if (!changed) { - if (reg.cells > reg.bels) - log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, - reg.x1, reg.y1, reg.cells, beltype.c_str(ctx)); - else - break; + for (auto bt : sorted(beltype)) { + if (reg.cells > reg.bels) + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, + reg.x1, reg.y1, reg.cells.at(type_index.at(bt)), bt.c_str(ctx)); + } + break; } } } @@ -1388,7 +1444,8 @@ class HeAPPlacer for (int x = r.x0; x <= r.x1; x++) { for (int y = r.y0; y <= r.y1; y++) { std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); - total_bels += bels_at(x, y); + for (size_t t = 0; t < beltype.size(); t++) + total_bels += bels_at(x, y, t); } } for (auto &cell : cut_cells) { @@ -1410,9 +1467,11 @@ class HeAPPlacer break; pivot++; } - if (pivot == int(cut_cells.size())) + if (pivot >= int(cut_cells.size())) { pivot = int(cut_cells.size()) - 1; - // log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); + } + // log_info("orig pivot %d/%d lc %d rc %d\n", pivot, int(cut_cells.size()), pivot_cells, total_cells - + // pivot_cells); // Find the clearance required either side of the pivot int clearance_l = 0, clearance_r = 0; @@ -1438,10 +1497,11 @@ class HeAPPlacer while (trimmed_l < (dir ? r.y1 : r.x1)) { bool have_bels = false; for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) - if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i) > 0) { - have_bels = true; - break; - } + for (size_t t = 0; t < beltype.size(); t++) + if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i, t) > 0) { + have_bels = true; + break; + } if (have_bels) break; trimmed_l++; @@ -1449,10 +1509,11 @@ class HeAPPlacer while (trimmed_r > (dir ? r.y0 : r.x0)) { bool have_bels = false; for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) - if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i) > 0) { - have_bels = true; - break; - } + for (size_t t = 0; t < beltype.size(); t++) + if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i, t) > 0) { + have_bels = true; + break; + } if (have_bels) break; trimmed_r--; @@ -1462,50 +1523,94 @@ class HeAPPlacer return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst // meeting the clearance requirements for any large macros - int left_cells = pivot_cells, right_cells = total_cells - pivot_cells; - int left_bels = 0, right_bels = total_bels; + std::vector left_cells_v(beltype.size(), 0), right_cells_v(beltype.size(), 0); + std::vector left_bels_v(beltype.size(), 0), right_bels_v(r.bels); + for (int i = 0; i <= pivot; i++) + left_cells_v.at(type_index.at(cut_cells.at(i)->type)) += + p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; + for (int i = pivot + 1; i < int(cut_cells.size()); i++) + right_cells_v.at(type_index.at(cut_cells.at(i)->type)) += + p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; + int best_tgt_cut = -1; double best_deltaU = std::numeric_limits::max(); - std::pair target_cut_bels; + // std::pair target_cut_bels; + std::vector slither_bels(beltype.size(), 0); for (int i = trimmed_l; i <= trimmed_r; i++) { - int slither_bels = 0; + for (size_t t = 0; t < beltype.size(); t++) + slither_bels.at(t) = 0; for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { - slither_bels += dir ? bels_at(j, i) : bels_at(i, j); + for (size_t t = 0; t < beltype.size(); t++) + slither_bels.at(t) += dir ? bels_at(j, i, t) : bels_at(i, j, t); } - left_bels += slither_bels; - right_bels -= slither_bels; + for (size_t t = 0; t < beltype.size(); t++) { + left_bels_v.at(t) += slither_bels.at(t); + right_bels_v.at(t) -= slither_bels.at(t); + } + if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { // Solution is potentially valid - double aU = - std::abs(double(left_cells) / double(left_bels) - double(right_cells) / double(right_bels)); + double aU = 0; + for (size_t t = 0; t < beltype.size(); t++) + aU += (left_cells_v.at(t) + right_cells_v.at(t)) * + std::abs(double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - + double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1))); if (aU < best_deltaU) { best_deltaU = aU; best_tgt_cut = i; - target_cut_bels = std::make_pair(left_bels, right_bels); } } } if (best_tgt_cut == -1) return {}; - left_bels = target_cut_bels.first; - right_bels = target_cut_bels.second; - // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, - // right_cells, right_bels); + // left_bels = target_cut_bels.first; + // right_bels = target_cut_bels.second; + for (size_t t = 0; t < beltype.size(); t++) { + left_bels_v.at(t) = 0; + right_bels_v.at(t) = 0; + } + for (int x = r.x0; x <= (dir ? r.x1 : best_tgt_cut); x++) + for (int y = r.y0; y <= (dir ? best_tgt_cut : r.y1); y++) { + for (size_t t = 0; t < beltype.size(); t++) { + left_bels_v.at(t) += bels_at(x, y, t); + } + } + for (int x = dir ? r.x0 : (best_tgt_cut + 1); x <= r.x1; x++) + for (int y = dir ? (best_tgt_cut + 1) : r.y0; y <= r.y1; y++) { + for (size_t t = 0; t < beltype.size(); t++) { + right_bels_v.at(t) += bels_at(x, y, t); + } + } + if (std::accumulate(left_bels_v.begin(), left_bels_v.end(), 0) == 0 || + std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0) == 0) + return {}; + // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, + // std::accumulate(left_cells_v.begin(), left_cells_v.end(), 0), std::accumulate(left_bels_v.begin(), + // left_bels_v.end(), 0), + // std::accumulate(right_cells_v.begin(), right_cells_v.end(), 0), + // std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0)); // Peturb the source cut to eliminate overutilisation - while (pivot > 0 && (double(left_cells) / double(left_bels) > double(right_cells) / double(right_bels))) { + auto is_part_overutil = [&](bool r) { + double delta = 0; + for (size_t t = 0; t < left_cells_v.size(); t++) { + delta += double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - + double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1)); + } + return r ? delta < 0 : delta > 0; + }; + while (pivot > 0 && is_part_overutil(false)) { auto &move_cell = cut_cells.at(pivot); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; - left_cells -= size; - right_cells += size; + left_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) -= size; + right_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) += size; pivot--; } - while (pivot < int(cut_cells.size()) - 1 && - (double(left_cells) / double(left_bels) < double(right_cells) / double(right_bels))) { + while (pivot < int(cut_cells.size()) - 1 && is_part_overutil(true)) { auto &move_cell = cut_cells.at(pivot + 1); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; - left_cells += size; - right_cells -= size; + left_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) += size; + right_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) -= size; pivot++; } // log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, @@ -1577,15 +1682,15 @@ class HeAPPlacer rl.y0 = r.y0; rl.x1 = dir ? r.x1 : best_tgt_cut; rl.y1 = dir ? best_tgt_cut : r.y1; - rl.cells = left_cells; - rl.bels = left_bels; + rl.cells = left_cells_v; + rl.bels = left_bels_v; rr.id = int(regions.size()) + 1; rr.x0 = dir ? r.x0 : (best_tgt_cut + 1); rr.y0 = dir ? (best_tgt_cut + 1) : r.y0; rr.x1 = r.x1; rr.y1 = r.y1; - rr.cells = right_cells; - rr.bels = right_bels; + rr.cells = right_cells_v; + rr.bels = right_bels_v; regions.push_back(rl); regions.push_back(rr); for (int x = rl.x0; x <= rl.x1; x++) @@ -1611,6 +1716,7 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx) timingWeight = ctx->setting("placerHeap/timingWeight", 10); timing_driven = ctx->setting("timing_driven"); solverTolerance = 1e-5; + placeAllAtOnce = false; } NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.h b/common/placer_heap.h index 94ac5229..4bcf71e8 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -40,8 +40,13 @@ struct PlacerHeapCfg float timingWeight; bool timing_driven; float solverTolerance; + bool placeAllAtOnce; + // These cell types will be randomly locked to prevent singular matrices std::unordered_set ioBufTypes; + // These cell types are part of the same unit (e.g. slices split into + // components) so will always be spread together + std::vector> cellGroups; }; extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); -- cgit v1.2.3 From 7db1484c75fe2d33ba2b39c3360b3a171ed16cf4 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:45:01 +0000 Subject: HeAP: Make beta configurable Signed-off-by: David Shah --- common/placer_heap.cc | 17 +++++++++-------- common/placer_heap.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 7d1f2033..d7b92fb1 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -1028,7 +1028,6 @@ class HeAPPlacer sl_time += std::chrono::duration(endt - startt).count(); } // Implementation of the cut-based spreading as described in the HeAP/SimPL papers - static constexpr float beta = 0.9; template T limit_to_reg(Region *reg, T val, bool dir) { @@ -1049,7 +1048,7 @@ class HeAPPlacer int id; int x0, y0, x1, y1; std::vector cells, bels; - bool overused() const + bool overused(float beta) const { for (size_t t = 0; t < cells.size(); t++) { if (bels.at(t) < 4) { @@ -1380,8 +1379,9 @@ class HeAPPlacer void expand_regions() { std::queue overu_regions; + float beta = p->cfg.beta; for (auto &r : regions) { - if (!merged_regions.count(r.id) && r.overused()) + if (!merged_regions.count(r.id) && r.overused(beta)) overu_regions.push(r.id); } while (!overu_regions.empty()) { @@ -1390,33 +1390,33 @@ class HeAPPlacer if (merged_regions.count(rid)) continue; auto ® = regions.at(rid); - while (reg.overused()) { + while (reg.overused(beta)) { bool changed = false; // 2 x units for every 1 y unit to account for INT gaps between CLBs for (int j = 0; j < 2; j++) { if (reg.x0 > 0) { grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); changed = true; - if (!reg.overused()) + if (!reg.overused(beta)) break; } if (reg.x1 < p->max_x) { grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); changed = true; - if (!reg.overused()) + if (!reg.overused(beta)) break; } } if (reg.y0 > 0) { grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); changed = true; - if (!reg.overused()) + if (!reg.overused(beta)) break; } if (reg.y1 < p->max_y) { grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); changed = true; - if (!reg.overused()) + if (!reg.overused(beta)) break; } if (!changed) { @@ -1712,6 +1712,7 @@ bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg). PlacerHeapCfg::PlacerHeapCfg(Context *ctx) { alpha = ctx->setting("placerHeap/alpha", 0.1); + beta = ctx->setting("placerHeap/beta", 0.9); criticalityExponent = ctx->setting("placerHeap/criticalityExponent", 2); timingWeight = ctx->setting("placerHeap/timingWeight", 10); timing_driven = ctx->setting("timing_driven"); diff --git a/common/placer_heap.h b/common/placer_heap.h index 4bcf71e8..f2f78d51 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -35,7 +35,7 @@ struct PlacerHeapCfg { PlacerHeapCfg(Context *ctx); - float alpha; + float alpha, beta; float criticalityExponent; float timingWeight; bool timing_driven; -- cgit v1.2.3 From 1cb0e3af03affd2419f08e234c3a969dec68812a Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:52:28 +0000 Subject: HeAP: Add X and Y scaling factors for asymmetric arches Signed-off-by: David Shah --- common/placer_heap.cc | 42 ++++++++++++++++++++++++++---------------- common/placer_heap.h | 3 +++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index d7b92fb1..e89ce76a 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -686,7 +686,9 @@ class HeAPPlacer if (other == &port) return; int o_pos = cell_pos(other->cell); - double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); + double weight = 1.0 / (ni->users.size() * + std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * + std::abs(o_pos - this_pos))); if (user_idx != -1 && net_crit.count(ni->name)) { auto &nc = net_crit.at(ni->name); @@ -712,7 +714,9 @@ class HeAPPlacer int l_pos = legal_pos(solve_cells.at(row)); int c_pos = cell_pos(solve_cells.at(row)); - double weight = alpha * iter / std::max(1, std::abs(l_pos - c_pos)); + double weight = + alpha * iter / + std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(l_pos - c_pos)); // Add an arc from legalised to current position es.add_coeff(row, row, weight); es.add_rhs(row, weight * l_pos); @@ -763,7 +767,7 @@ class HeAPPlacer ymin = std::min(ymin, usrloc.y); ymax = std::max(ymax, usrloc.y); } - hpwl += (xmax - xmin) + (ymax - ymin); + hpwl += cfg.hpwl_scale_x * (xmax - xmin) + cfg.hpwl_scale_y * (ymax - ymin); } return hpwl; } @@ -1392,8 +1396,7 @@ class HeAPPlacer auto ® = regions.at(rid); while (reg.overused(beta)) { bool changed = false; - // 2 x units for every 1 y unit to account for INT gaps between CLBs - for (int j = 0; j < 2; j++) { + for (int j = 0; j < p->cfg.spread_scale_x; j++) { if (reg.x0 > 0) { grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); changed = true; @@ -1407,17 +1410,19 @@ class HeAPPlacer break; } } - if (reg.y0 > 0) { - grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); - changed = true; - if (!reg.overused(beta)) - break; - } - if (reg.y1 < p->max_y) { - grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); - changed = true; - if (!reg.overused(beta)) - break; + for (int j = 0; j < p->cfg.spread_scale_y; j++) { + if (reg.y0 > 0) { + grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); + changed = true; + if (!reg.overused(beta)) + break; + } + if (reg.y1 < p->max_y) { + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + changed = true; + if (!reg.overused(beta)) + break; + } } if (!changed) { for (auto bt : sorted(beltype)) { @@ -1718,6 +1723,11 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx) timing_driven = ctx->setting("timing_driven"); solverTolerance = 1e-5; placeAllAtOnce = false; + + hpwl_scale_x = 1; + hpwl_scale_y = 1; + spread_scale_x = 1; + spread_scale_y = 1; } NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.h b/common/placer_heap.h index f2f78d51..46c00503 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -42,6 +42,9 @@ struct PlacerHeapCfg float solverTolerance; bool placeAllAtOnce; + int hpwl_scale_x, hpwl_scale_y; + int spread_scale_x, spread_scale_y; + // These cell types will be randomly locked to prevent singular matrices std::unordered_set ioBufTypes; // These cell types are part of the same unit (e.g. slices split into -- cgit v1.2.3 From 9125698cb09b7f869599f47cfa9992d9712de41e Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 15:54:36 +0000 Subject: HeAP: backport out-of-range fix Signed-off-by: David Shah --- common/placer_heap.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index e89ce76a..c04e7091 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -594,11 +594,11 @@ class HeAPPlacer if (child->type == root->type) chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) - cell_locs[child->name].x = std::min(max_x, base.x + child->constr_x); + cell_locs[child->name].x = std::max(0, std::min(max_x, base.x + child->constr_x)); else cell_locs[child->name].x = base.x; // better handling of UNCONSTR? if (child->constr_y != child->UNCONSTR) - cell_locs[child->name].y = std::min(max_y, base.y + child->constr_y); + cell_locs[child->name].y = std::max(0, std::min(max_y, base.y + child->constr_y)); else cell_locs[child->name].y = base.y; // better handling of UNCONSTR? chain_root[child->name] = root; -- cgit v1.2.3 From 2c7d2f9e0c850590f306139a4eaa9135b0f2a254 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 2 Feb 2020 16:01:40 +0000 Subject: placer1: Add routeability optimisation (off by default) Signed-off-by: David Shah --- common/placer1.cc | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- common/placer1.h | 2 +- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 9c1eaba4..1ba50e28 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -259,6 +259,9 @@ class SAPlacer last_wirelen_cost = curr_wirelen_cost; last_timing_cost = curr_timing_cost; + if (cfg.netShareWeight > 0) + setup_nets_by_tile(); + wirelen_t avg_wirelen = curr_wirelen_cost; wirelen_t min_wirelen = curr_wirelen_cost; @@ -513,6 +516,11 @@ class SAPlacer if (other_cell != nullptr) old_dist += get_constraints_distance(ctx, other_cell); double delta = 0; + + int net_delta_score = 0; + if (cfg.netShareWeight > 0) + net_delta_score += update_nets_by_tile(cell, ctx->getBelLocation(cell->bel), ctx->getBelLocation(newBel)); + ctx->unbindBel(oldBel); if (other_cell != nullptr) { ctx->unbindBel(newBel); @@ -522,6 +530,9 @@ class SAPlacer if (other_cell != nullptr) { ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + net_delta_score += + update_nets_by_tile(other_cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); } add_move_cell(moveChange, cell, oldBel); @@ -546,6 +557,8 @@ class SAPlacer delta = lambda * (moveChange.timing_delta / std::max(last_timing_cost, epsilon)) + (1 - lambda) * (double(moveChange.wirelen_delta) / std::max(last_wirelen_cost, epsilon)); delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; + if (cfg.netShareWeight > 0) + delta += -cfg.netShareWeight * (net_delta_score / std::max(total_net_share, epsilon)); n_move++; // SA acceptance criterea if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { @@ -567,7 +580,11 @@ class SAPlacer ctx->bindBel(oldBel, cell, STRENGTH_WEAK); if (other_cell != nullptr) { ctx->bindBel(newBel, other_cell, STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + update_nets_by_tile(other_cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel)); } + if (cfg.netShareWeight > 0) + update_nets_by_tile(cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); return false; } @@ -588,8 +605,13 @@ class SAPlacer ctx->unbindBel(newBel); ctx->unbindBel(oldBel); ctx->bindBel(newBel, cell, is_constrained(cell) ? STRENGTH_STRONG : STRENGTH_WEAK); - if (bound != nullptr) + if (bound != nullptr) { ctx->bindBel(oldBel, bound, is_constrained(bound) ? STRENGTH_STRONG : STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + update_nets_by_tile(bound, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); + } + if (cfg.netShareWeight > 0) + update_nets_by_tile(cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel)); return oldBel; } @@ -611,6 +633,7 @@ class SAPlacer std::vector> moves_made; std::vector> dest_bels; double delta = 0; + int orig_share_cost = total_net_share; moveChange.reset(this); #if 0 if (ctx->debug) @@ -663,6 +686,10 @@ class SAPlacer compute_cost_changes(moveChange); delta = lambda * (moveChange.timing_delta / last_timing_cost) + (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); + if (cfg.netShareWeight > 0) { + delta += + cfg.netShareWeight * (orig_share_cost - total_net_share) / std::max(total_net_share, 1e-20); + } n_move++; // SA acceptance criterea if (delta < 0 || (temp > 1e-9 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { @@ -1105,8 +1132,66 @@ class SAPlacer } } + // Simple routeability driven placement + const int large_cell_thresh = 50; + int total_net_share = 0; + std::vector>> nets_by_tile; + void setup_nets_by_tile() + { + total_net_share = 0; + nets_by_tile.resize(max_x + 1, std::vector>(max_y + 1)); + for (auto cell : sorted(ctx->cells)) { + CellInfo *ci = cell.second; + if (int(ci->ports.size()) > large_cell_thresh) + continue; + Loc loc = ctx->getBelLocation(ci->bel); + auto &nbt = nets_by_tile.at(loc.x).at(loc.y); + for (const auto &port : ci->ports) { + if (port.second.net == nullptr) + continue; + if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel)) + continue; + int &s = nbt[port.second.net->name]; + if (s > 0) + ++total_net_share; + ++s; + } + } + } + + int update_nets_by_tile(CellInfo *ci, Loc old_loc, Loc new_loc) + { + if (int(ci->ports.size()) > large_cell_thresh) + return 0; + int loss = 0, gain = 0; + auto &nbt_old = nets_by_tile.at(old_loc.x).at(old_loc.y); + auto &nbt_new = nets_by_tile.at(new_loc.x).at(new_loc.y); + + for (const auto &port : ci->ports) { + if (port.second.net == nullptr) + continue; + if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel)) + continue; + int &o = nbt_old[port.second.net->name]; + --o; + NPNR_ASSERT(o >= 0); + if (o > 0) + ++loss; + int &n = nbt_new[port.second.net->name]; + if (n > 0) + ++gain; + ++n; + } + int delta = gain - loss; + total_net_share += delta; + return delta; + } + // Get the combined wirelen/timing metric - inline double curr_metric() { return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost; } + inline double curr_metric() + { + return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost - cfg.netShareWeight * total_net_share; + } // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds std::vector net_bounds; @@ -1144,6 +1229,7 @@ class SAPlacer Placer1Cfg::Placer1Cfg(Context *ctx) { constraintWeight = ctx->setting("placer1/constraintWeight", 10); + netShareWeight = ctx->setting("placer1/netShareWeight", 0); minBelsForGridPick = ctx->setting("placer1/minBelsForGridPick", 64); budgetBased = ctx->setting("placer1/budgetBased", false); startTemp = ctx->setting("placer1/startTemp", 1); diff --git a/common/placer1.h b/common/placer1.h index e803d592..cf66151c 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -27,7 +27,7 @@ NEXTPNR_NAMESPACE_BEGIN struct Placer1Cfg { Placer1Cfg(Context *ctx); - float constraintWeight; + float constraintWeight, netShareWeight; int minBelsForGridPick; bool budgetBased; float startTemp; -- cgit v1.2.3