diff options
author | gatecat <gatecat@ds0.me> | 2021-03-09 08:48:12 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-09 08:48:12 +0000 |
commit | 326b34887cdf82dc834382f4bf35d120bd4173dd (patch) | |
tree | bb346955c661bf96b51e309a3f1030c27ac1bb07 | |
parent | 0f17e80eef2a4c0e417b65efa559481f83831f00 (diff) | |
parent | 8a4bf3a7805080db2c8f2e797a0f12aad7c99f5d (diff) | |
download | nextpnr-326b34887cdf82dc834382f4bf35d120bd4173dd.tar.gz nextpnr-326b34887cdf82dc834382f4bf35d120bd4173dd.tar.bz2 nextpnr-326b34887cdf82dc834382f4bf35d120bd4173dd.zip |
Merge pull request #609 from YosysHQ/gatecat/sta-v2
Use new timing engine for criticality
-rw-r--r-- | common/nextpnr.h | 5 | ||||
-rw-r--r-- | common/placer1.cc | 18 | ||||
-rw-r--r-- | common/placer_heap.cc | 19 | ||||
-rw-r--r-- | common/router2.cc | 19 | ||||
-rw-r--r-- | common/timing.cc | 706 | ||||
-rw-r--r-- | common/timing.h | 258 | ||||
-rw-r--r-- | common/timing_opt.cc | 62 | ||||
-rw-r--r-- | common/util.h | 92 | ||||
-rw-r--r-- | ecp5/arch_place.cc | 12 | ||||
-rw-r--r-- | nexus/post_place.cc | 6 |
10 files changed, 943 insertions, 254 deletions
diff --git a/common/nextpnr.h b/common/nextpnr.h index ab2f8dca..404900c4 100644 --- a/common/nextpnr.h +++ b/common/nextpnr.h @@ -552,6 +552,10 @@ struct DelayPair { return {min_delay + other.min_delay, max_delay + other.max_delay}; } + DelayPair operator-(const DelayPair &other) const + { + return {min_delay - other.min_delay, max_delay - other.max_delay}; + } }; // four-quadrant, min and max rise and fall delay @@ -575,6 +579,7 @@ struct DelayQuad DelayPair delayPair() const { return DelayPair(minDelay(), maxDelay()); }; DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; } + DelayQuad operator-(const DelayQuad &other) const { return {rise - other.rise, fall - other.fall}; } }; struct ClockConstraint; diff --git a/common/placer1.cc b/common/placer1.cc index d57a841a..c8b0d385 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -78,7 +78,7 @@ class SAPlacer public: SAPlacer(Context *ctx, Placer1Cfg cfg) - : ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg) + : ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg), tmg(ctx) { for (auto bel : ctx->getBels()) { Loc loc = ctx->getBelLocation(bel); @@ -241,8 +241,9 @@ class SAPlacer auto saplace_start = std::chrono::high_resolution_clock::now(); // Invoke timing analysis to obtain criticalities + tmg.setup_only = true; if (!cfg.budgetBased) - get_criticalities(ctx, &net_crit); + tmg.setup(); // Calculate costs after initial placement setup_costs(); @@ -379,7 +380,7 @@ class SAPlacer // Invoke timing analysis to obtain criticalities if (!cfg.budgetBased && cfg.timing_driven) - get_criticalities(ctx, &net_crit); + tmg.run(); // Need to rebuild costs after criticalities change setup_costs(); // Reset incremental bounds @@ -836,11 +837,9 @@ class SAPlacer double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10)); } else { - auto crit = net_crit.find(net->name); - if (crit == net_crit.end() || crit->second.criticality.empty()) - return 0; + float crit = tmg.get_criticality(CellPortKey(net->users.at(user))); double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); - return delay * std::pow(crit->second.criticality.at(user), crit_exp); + return delay * std::pow(crit, crit_exp); } } @@ -1216,9 +1215,6 @@ class SAPlacer wirelen_t last_wirelen_cost, curr_wirelen_cost; double last_timing_cost, curr_timing_cost; - // Criticality data from timing analysis - NetCriticalityMap net_crit; - Context *ctx; float temp = 10; float crit_exp = 8; @@ -1235,6 +1231,8 @@ class SAPlacer bool require_legal = true; const int legalise_dia = 4; Placer1Cfg cfg; + + TimingAnalyser tmg; }; Placer1Cfg::Placer1Cfg(Context *ctx) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index eb931a37..3ee8503c 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -139,9 +139,12 @@ template <typename T> struct EquationSystem class HeAPPlacer { public: - HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1) + HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) + : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx) { Eigen::initParallel(); + tmg.setup_only = true; + tmg.setup(); } bool place() @@ -269,7 +272,7 @@ class HeAPPlacer // Update timing weights if (cfg.timing_driven) - get_criticalities(ctx, &net_crit); + tmg.run(); if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; @@ -355,6 +358,8 @@ class HeAPPlacer FastBels fast_bels; std::unordered_map<IdString, std::tuple<int, int>> bel_types; + TimingAnalyser tmg; + struct BoundingBox { // Actual bounding box @@ -392,8 +397,6 @@ class HeAPPlacer // Performance counting double solve_time = 0, cl_time = 0, sl_time = 0; - NetCriticalityMap net_crit; - // Place cells with the BEL attribute set to constrain them void place_constraints() { @@ -736,11 +739,9 @@ class HeAPPlacer std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(o_pos - this_pos))); - if (user_idx != -1 && net_crit.count(ni->name)) { - auto &nc = net_crit.at(ni->name); - if (user_idx < int(nc.criticality.size())) - weight *= (1.0 + cfg.timingWeight * - std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent)); + if (user_idx != -1) { + weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)), + cfg.criticalityExponent)); } // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, diff --git a/common/router2.cc b/common/router2.cc index 9fca1f2a..35042f14 100644 --- a/common/router2.cc +++ b/common/router2.cc @@ -112,16 +112,14 @@ struct Router2 Context *ctx; Router2Cfg cfg; - Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg) {} + Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) { tmg.setup(); } // Use 'udata' for fast net lookups and indexing std::vector<NetInfo *> nets_by_udata; std::vector<PerNetData> nets; bool timing_driven; - - // Criticality data from timing analysis - NetCriticalityMap net_crit; + TimingAnalyser tmg; void setup_nets() { @@ -1175,18 +1173,13 @@ struct Router2 if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 50))) { // Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few // congested nodes - get_criticalities(ctx, &net_crit); + tmg.run(); for (auto n : route_queue) { - IdString name = nets_by_udata.at(n)->name; - auto fnd = net_crit.find(name); + NetInfo *ni = nets_by_udata.at(n); auto &net = nets.at(n); net.max_crit = 0; - if (fnd == net_crit.end()) - continue; - for (int i = 0; i < int(fnd->second.criticality.size()); i++) { - float c = fnd->second.criticality.at(i); - for (auto &a : net.arcs.at(i)) - a.arc_crit = c; + for (auto &usr : ni->users) { + float c = tmg.get_criticality(CellPortKey(usr)); net.max_crit = std::max(net.max_crit, c); } } diff --git a/common/timing.cc b/common/timing.cc index a61c0beb..8229f143 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -30,6 +30,547 @@ NEXTPNR_NAMESPACE_BEGIN +void TimingAnalyser::setup() +{ + init_ports(); + get_cell_delays(); + topo_sort(); + setup_port_domains(); + run(); +} + +void TimingAnalyser::run() +{ + reset_times(); + get_route_delays(); + walk_forward(); + walk_backward(); + compute_slack(); + compute_criticality(); +} + +void TimingAnalyser::init_ports() +{ + // Per cell port structures + for (auto cell : sorted(ctx->cells)) { + CellInfo *ci = cell.second; + for (auto port : sorted_ref(ci->ports)) { + auto &data = ports[CellPortKey(ci->name, port.first)]; + data.type = port.second.type; + data.cell_port = CellPortKey(ci->name, port.first); + } + } + // Cell port to net port mapping + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + if (ni->driver.cell != nullptr) + ports[CellPortKey(ni->driver)].net_port = NetPortKey(ni->name); + for (size_t i = 0; i < ni->users.size(); i++) + ports[CellPortKey(ni->users.at(i))].net_port = NetPortKey(ni->name, i); + } +} + +void TimingAnalyser::get_cell_delays() +{ + for (auto &port : ports) { + CellInfo *ci = cell_info(port.first); + auto &pi = port_info(port.first); + auto &pd = port.second; + + IdString name = port.first.port; + // Ignore dangling ports altogether for timing purposes + if (pd.net_port.net == IdString()) + continue; + pd.cell_arcs.clear(); + int clkInfoCount = 0; + TimingPortClass cls = ctx->getPortTimingClass(ci, name, clkInfoCount); + if (cls == TMG_STARTPOINT || cls == TMG_ENDPOINT || cls == TMG_CLOCK_INPUT || cls == TMG_GEN_CLOCK || + cls == TMG_IGNORE) + continue; + if (pi.type == PORT_IN) { + // Input ports might have setup/hold relationships + if (cls == TMG_REGISTER_INPUT) { + for (int i = 0; i < clkInfoCount; i++) { + auto info = ctx->getPortClockingInfo(ci, name, i); + if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr) + continue; + pd.cell_arcs.emplace_back(CellArc::SETUP, info.clock_port, DelayQuad(info.setup, info.setup), + info.edge); + pd.cell_arcs.emplace_back(CellArc::HOLD, info.clock_port, DelayQuad(info.hold, info.hold), + info.edge); + } + } + // Combinational delays through cell + for (auto &other_port : ci->ports) { + auto &op = other_port.second; + // ignore dangling ports and non-outputs + if (op.net == nullptr || op.type != PORT_OUT) + continue; + DelayQuad delay; + bool is_path = ctx->getCellDelay(ci, name, other_port.first, delay); + if (is_path) + pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay); + } + } else if (pi.type == PORT_OUT) { + // Output ports might have clk-to-q relationships + if (cls == TMG_REGISTER_OUTPUT) { + for (int i = 0; i < clkInfoCount; i++) { + auto info = ctx->getPortClockingInfo(ci, name, i); + if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr) + continue; + pd.cell_arcs.emplace_back(CellArc::CLK_TO_Q, info.clock_port, info.clockToQ, info.edge); + } + } + // Combinational delays through cell + for (auto &other_port : ci->ports) { + auto &op = other_port.second; + // ignore dangling ports and non-inputs + if (op.net == nullptr || op.type != PORT_IN) + continue; + DelayQuad delay; + bool is_path = ctx->getCellDelay(ci, other_port.first, name, delay); + if (is_path) + pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay); + } + } + } +} + +void TimingAnalyser::get_route_delays() +{ + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + if (ni->driver.cell == nullptr || ni->driver.cell->bel == BelId()) + continue; + for (auto &usr : ni->users) { + if (usr.cell->bel == BelId()) + continue; + ports.at(CellPortKey(usr)).route_delay = DelayPair(ctx->getNetinfoRouteDelay(ni, usr)); + } + } +} + +void TimingAnalyser::topo_sort() +{ + TopoSort<CellPortKey> topo; + for (auto &port : ports) { + auto &pd = port.second; + // All ports are nodes + topo.node(port.first); + if (pd.type == PORT_IN) { + // inputs: combinational arcs through the cell are edges + for (auto &arc : pd.cell_arcs) { + if (arc.type != CellArc::COMBINATIONAL) + continue; + topo.edge(port.first, CellPortKey(port.first.cell, arc.other_port)); + } + } else if (pd.type == PORT_OUT) { + // output: routing arcs are edges + const NetInfo *pn = port_info(port.first).net; + if (pn != nullptr) { + for (auto &usr : pn->users) + topo.edge(port.first, CellPortKey(usr)); + } + } + } + bool no_loops = topo.sort(); + if (!no_loops && verbose_mode) { + log_info("Found %d combinational loops:\n", int(topo.loops.size())); + int i = 0; + for (auto &loop : topo.loops) { + log_info(" loop %d:\n", ++i); + for (auto &port : loop) { + log_info(" %s.%s (%s)\n", ctx->nameOf(port.cell), ctx->nameOf(port.port), + ctx->nameOf(port_info(port).net)); + } + } + } + std::swap(topological_order, topo.sorted); +} + +void TimingAnalyser::setup_port_domains() +{ + for (auto &d : domains) { + d.startpoints.clear(); + d.endpoints.clear(); + } + // Go forward through the topological order (domains from the PoV of arrival time) + for (auto port : topological_order) { + auto &pd = ports.at(port); + auto &pi = port_info(port); + if (pi.type == PORT_OUT) { + for (auto &fanin : pd.cell_arcs) { + if (fanin.type != CellArc::CLK_TO_Q) + continue; + // registered outputs are startpoints + auto dom = domain_id(port.cell, fanin.other_port, fanin.edge); + // create per-domain data + pd.arrival[dom]; + domains.at(dom).startpoints.emplace_back(port, fanin.other_port); + } + // copy domains across routing + if (pi.net != nullptr) + for (auto &usr : pi.net->users) + copy_domains(port, CellPortKey(usr), false); + } else { + // copy domains from input to output + for (auto &fanout : pd.cell_arcs) { + if (fanout.type != CellArc::COMBINATIONAL) + continue; + copy_domains(port, CellPortKey(port.cell, fanout.other_port), false); + } + } + } + // Go backward through the topological order (domains from the PoV of required time) + for (auto port : reversed_range(topological_order)) { + auto &pd = ports.at(port); + auto &pi = port_info(port); + if (pi.type == PORT_OUT) { + // copy domains from output to input + for (auto &fanin : pd.cell_arcs) { + if (fanin.type != CellArc::COMBINATIONAL) + continue; + copy_domains(port, CellPortKey(port.cell, fanin.other_port), true); + } + } else { + for (auto &fanout : pd.cell_arcs) { + if (fanout.type != CellArc::SETUP) + continue; + // registered inputs are startpoints + auto dom = domain_id(port.cell, fanout.other_port, fanout.edge); + // create per-domain data + pd.required[dom]; + domains.at(dom).endpoints.emplace_back(port, fanout.other_port); + } + // copy port to driver + if (pi.net != nullptr && pi.net->driver.cell != nullptr) + copy_domains(port, CellPortKey(pi.net->driver), true); + } + } + // Iterate over ports and find domain paris + for (auto port : topological_order) { + auto &pd = ports.at(port); + for (auto &arr : pd.arrival) + for (auto &req : pd.required) { + pd.domain_pairs[domain_pair_id(arr.first, req.first)]; + } + } +} + +void TimingAnalyser::reset_times() +{ + for (auto &port : ports) { + auto do_reset = [&](std::unordered_map<domain_id_t, ArrivReqTime> ×) { + for (auto &t : times) { + t.second.value = init_delay; + t.second.path_length = 0; + t.second.bwd_min = CellPortKey(); + t.second.bwd_max = CellPortKey(); + } + }; + do_reset(port.second.arrival); + do_reset(port.second.required); + for (auto &dp : port.second.domain_pairs) { + dp.second.setup_slack = std::numeric_limits<delay_t>::max(); + dp.second.hold_slack = std::numeric_limits<delay_t>::max(); + dp.second.max_path_length = 0; + dp.second.criticality = 0; + dp.second.budget = 0; + } + port.second.worst_crit = 0; + port.second.worst_setup_slack = std::numeric_limits<delay_t>::max(); + port.second.worst_hold_slack = std::numeric_limits<delay_t>::max(); + } +} + +void TimingAnalyser::set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length, + CellPortKey prev) +{ + auto &arr = ports.at(target).arrival.at(domain); + if (arrival.max_delay > arr.value.max_delay) { + arr.value.max_delay = arrival.max_delay; + arr.bwd_max = prev; + } + if (!setup_only && (arrival.min_delay < arr.value.min_delay)) { + arr.value.min_delay = arrival.min_delay; + arr.bwd_min = prev; + } + arr.path_length = std::max(arr.path_length, path_length); +} + +void TimingAnalyser::set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length, + CellPortKey prev) +{ + auto &req = ports.at(target).required.at(domain); + if (required.min_delay < req.value.min_delay) { + req.value.min_delay = required.min_delay; + req.bwd_min = prev; + } + if (!setup_only && (required.max_delay > req.value.max_delay)) { + req.value.max_delay = required.max_delay; + req.bwd_max = prev; + } + req.path_length = std::max(req.path_length, path_length); +} + +void TimingAnalyser::walk_forward() +{ + // Assign initial arrival time to domain startpoints + for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) { + auto &dom = domains.at(dom_id); + for (auto &sp : dom.startpoints) { + auto &pd = ports.at(sp.first); + DelayPair init_arrival(0); + CellPortKey clock_key; + // TODO: clock routing delay, if analysis of that is enabled + if (sp.second != IdString()) { + // clocked startpoints have a clock-to-out time + for (auto &fanin : pd.cell_arcs) { + if (fanin.type == CellArc::CLK_TO_Q && fanin.other_port == sp.second) { + init_arrival = init_arrival + fanin.value.delayPair(); + break; + } + } + clock_key = CellPortKey(sp.first.cell, sp.second); + } + set_arrival_time(sp.first, dom_id, init_arrival, 1, clock_key); + } + } + // Walk forward in topological order + for (auto p : topological_order) { + auto &pd = ports.at(p); + for (auto &arr : pd.arrival) { + if (pd.type == PORT_OUT) { + // Output port: propagate delay through net, adding route delay + NetInfo *net = port_info(p).net; + if (net != nullptr) + for (auto &usr : net->users) { + CellPortKey usr_key(usr); + auto &usr_pd = ports.at(usr_key); + set_arrival_time(usr_key, arr.first, arr.second.value + usr_pd.route_delay, + arr.second.path_length, p); + } + } else if (pd.type == PORT_IN) { + // Input port; propagate delay through cell, adding combinational delay + for (auto &fanout : pd.cell_arcs) { + if (fanout.type != CellArc::COMBINATIONAL) + continue; + set_arrival_time(CellPortKey(p.cell, fanout.other_port), arr.first, + arr.second.value + fanout.value.delayPair(), arr.second.path_length + 1, p); + } + } + } + } +} + +void TimingAnalyser::walk_backward() +{ + // Assign initial required time to domain endpoints + // Note that clock frequency will be considered later in the analysis for, for now all required times are normalised + // to 0ns + for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) { + auto &dom = domains.at(dom_id); + for (auto &ep : dom.endpoints) { + auto &pd = ports.at(ep.first); + DelayPair init_setuphold(0); + CellPortKey clock_key; + // TODO: clock routing delay, if analysis of that is enabled + if (ep.second != IdString()) { + // Add setup/hold time, if this endpoint is clocked + for (auto &fanin : pd.cell_arcs) { + if (fanin.type == CellArc::SETUP && fanin.other_port == ep.second) + init_setuphold.min_delay -= fanin.value.maxDelay(); + if (fanin.type == CellArc::HOLD && fanin.other_port == ep.second) + init_setuphold.max_delay -= fanin.value.maxDelay(); + } + clock_key = CellPortKey(ep.first.cell, ep.second); + } + set_required_time(ep.first, dom_id, init_setuphold, 1, clock_key); + } + } + // Walk backwards in topological order + for (auto p : reversed_range(topological_order)) { + auto &pd = ports.at(p); + for (auto &req : pd.required) { + if (pd.type == PORT_IN) { + // Input port: propagate delay back through net, subtracting route delay + NetInfo *net = port_info(p).net; + if (net != nullptr && net->driver.cell != nullptr) + set_required_time(CellPortKey(net->driver), req.first, req.second.value - pd.route_delay, + req.second.path_length, p); + } else if (pd.type == PORT_OUT) { + // Output port : propagate delay back through cell, subtracting combinational delay + for (auto &fanin : pd.cell_arcs) { + if (fanin.type != CellArc::COMBINATIONAL) + continue; + set_required_time(CellPortKey(p.cell, fanin.other_port), req.first, + req.second.value - fanin.value.delayPair(), req.second.path_length + 1, p); + } + } + } + } +} + +void TimingAnalyser::print_fmax() +{ + // Temporary testing code for comparison only + std::unordered_map<int, double> domain_fmax; + for (auto p : topological_order) { + auto &pd = ports.at(p); + for (auto &req : pd.required) { + if (pd.arrival.count(req.first)) { + auto &arr = pd.arrival.at(req.first); + double fmax = 1000.0 / ctx->getDelayNS(arr.value.maxDelay() - req.second.value.minDelay()); + if (!domain_fmax.count(req.first) || domain_fmax.at(req.first) > fmax) + domain_fmax[req.first] = fmax; + } + } + } + for (auto &fm : domain_fmax) { + log_info("Domain %s Worst Fmax %.02f\n", ctx->nameOf(domains.at(fm.first).key.clock), fm.second); + } +} + +void TimingAnalyser::compute_slack() +{ + for (auto &dp : domain_pairs) { + dp.worst_setup_slack = std::numeric_limits<delay_t>::max(); + dp.worst_hold_slack = std::numeric_limits<delay_t>::max(); + } + for (auto p : topological_order) { + auto &pd = ports.at(p); + for (auto &pdp : pd.domain_pairs) { + auto &dp = domain_pairs.at(pdp.first); + auto &arr = pd.arrival.at(dp.key.launch); + auto &req = pd.required.at(dp.key.capture); + pdp.second.setup_slack = dp.period.minDelay() - (arr.value.maxDelay() - req.value.minDelay()); + if (!setup_only) + pdp.second.hold_slack = arr.value.minDelay() - req.value.maxDelay(); + pdp.second.max_path_length = arr.path_length + req.path_length; + pd.worst_setup_slack = std::min(pd.worst_setup_slack, pdp.second.setup_slack); + dp.worst_setup_slack = std::min(dp.worst_setup_slack, pdp.second.setup_slack); + if (!setup_only) { + pd.worst_hold_slack = std::min(pd.worst_hold_slack, pdp.second.hold_slack); + dp.worst_hold_slack = std::min(dp.worst_hold_slack, pdp.second.hold_slack); + } + } + } +} + +void TimingAnalyser::compute_criticality() +{ + for (auto p : topological_order) { + auto &pd = ports.at(p); + for (auto &pdp : pd.domain_pairs) { + auto &dp = domain_pairs.at(pdp.first); + float crit = + 1.0f - (float(pdp.second.setup_slack) - float(dp.worst_setup_slack)) / float(-dp.worst_setup_slack); + crit = std::min(crit, 1.0f); + crit = std::max(crit, 0.0f); + pdp.second.criticality = crit; + pd.worst_crit = std::max(pd.worst_crit, crit); + } + } +} + +std::vector<CellPortKey> TimingAnalyser::get_failing_eps(domain_id_t domain_pair, int count) +{ + std::vector<CellPortKey> failing_eps; + delay_t last_slack = std::numeric_limits<delay_t>::min(); + auto &dp = domain_pairs.at(domain_pair); + auto &cap_d = domains.at(dp.key.capture); + while (int(failing_eps.size()) < count) { + CellPortKey next; + delay_t next_slack = std::numeric_limits<delay_t>::max(); + for (auto ep : cap_d.endpoints) { + auto &pd = ports.at(ep.first); + if (!pd.domain_pairs.count(domain_pair)) + continue; + delay_t ep_slack = pd.domain_pairs.at(domain_pair).setup_slack; + if (ep_slack < next_slack && ep_slack > last_slack) { + next = ep.first; + next_slack = ep_slack; + } + } + if (next == CellPortKey()) + break; + failing_eps.push_back(next); + last_slack = next_slack; + } + return failing_eps; +} + +void TimingAnalyser::print_critical_path(CellPortKey endpoint, domain_id_t domain_pair) +{ + CellPortKey cursor = endpoint; + auto &dp = domain_pairs.at(domain_pair); + log(" endpoint %s.%s (slack %.02fns):\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port), + ctx->getDelayNS(ports.at(cursor).domain_pairs.at(domain_pair).setup_slack)); + while (cursor != CellPortKey()) { + log(" %s.%s (net %s)\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port), + ctx->nameOf(get_net_or_empty(ctx->cells.at(cursor.cell).get(), cursor.port))); + if (!ports.at(cursor).arrival.count(dp.key.launch)) + break; + cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_max; + } +} + +namespace { +const char *edge_name(ClockEdge edge) { return (edge == FALLING_EDGE) ? "negedge" : "posedge"; } +} // namespace + +void TimingAnalyser::print_report() +{ + for (int i = 0; i < int(domain_pairs.size()); i++) { + auto &dp = domain_pairs.at(i); + auto &launch = domains.at(dp.key.launch); + auto &capture = domains.at(dp.key.capture); + log("Worst endpoints for %s %s -> %s %s\n", edge_name(launch.key.edge), ctx->nameOf(launch.key.clock), + edge_name(capture.key.edge), ctx->nameOf(capture.key.clock)); + auto failing_eps = get_failing_eps(i, 5); + for (auto &ep : failing_eps) + print_critical_path(ep, i); + log_break(); + } +} + +domain_id_t TimingAnalyser::domain_id(IdString cell, IdString clock_port, ClockEdge edge) +{ + return domain_id(ctx->cells.at(cell)->ports.at(clock_port).net, edge); +} +domain_id_t TimingAnalyser::domain_id(const NetInfo *net, ClockEdge edge) +{ + NPNR_ASSERT(net != nullptr); + ClockDomainKey key{net->name, edge}; + auto inserted = domain_to_id.emplace(key, domains.size()); + if (inserted.second) { + domains.emplace_back(key); + } + return inserted.first->second; +} +domain_id_t TimingAnalyser::domain_pair_id(domain_id_t launch, domain_id_t capture) +{ + ClockDomainPairKey key{launch, capture}; + auto inserted = pair_to_id.emplace(key, domain_pairs.size()); + if (inserted.second) { + domain_pairs.emplace_back(key); + } + return inserted.first->second; +} + +void TimingAnalyser::copy_domains(const CellPortKey &from, const CellPortKey &to, bool backward) +{ + auto &f = ports.at(from), &t = ports.at(to); + for (auto &dom : (backward ? f.required : f.arrival)) + (backward ? t.required : t.arrival)[dom.first]; +} + +CellInfo *TimingAnalyser::cell_info(const CellPortKey &key) { return ctx->cells.at(key.cell).get(); } + +PortInfo &TimingAnalyser::port_info(const CellPortKey &key) { return ctx->cells.at(key.cell)->ports.at(key.port); } + +/** LEGACY CODE BEGIN **/ + namespace { struct ClockEvent { @@ -86,7 +627,6 @@ struct CriticalPath }; typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap; -typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap; struct Timing { @@ -96,7 +636,6 @@ struct Timing delay_t min_slack; CriticalPathMap *crit_path; DelayFrequency *slack_histogram; - NetCriticalityMap *net_crit; IdString async_clock; struct TimingData @@ -112,10 +651,9 @@ struct Timing }; Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr, - DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr) + DelayFrequency *slack_histogram = nullptr) : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")), - crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit), - async_clock(ctx->id("$async$")) + crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$")) { } @@ -496,156 +1034,6 @@ struct Timing std::reverse(cp_ports.begin(), cp_ports.end()); } } - - if (net_crit) { - NPNR_ASSERT(crit_path); - // Go through in reverse topological order to set required times - for (auto net : boost::adaptors::reverse(topological_order)) { - if (!net_data.count(net)) - continue; - auto &nd_map = net_data.at(net); - for (auto &startdomain : nd_map) { - auto &nd = startdomain.second; - if (nd.false_startpoint) - continue; - if (startdomain.first.clock == async_clock) - continue; - if (nd.min_required.empty()) - nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max()); - delay_t net_min_required = std::numeric_limits<delay_t>::max(); - for (size_t i = 0; i < net->users.size(); i++) { - auto &usr = net->users.at(i); - auto net_delay = ctx->getNetinfoRouteDelay(net, usr); - int port_clocks; - TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks); - if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) { - auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) { - delay_t period; - // Set default period - if (edge == startdomain.first.edge) { - period = clk_period; - } else { - period = clk_period / 2; - } - if (clksig != async_clock) { - if (ctx->nets.at(clksig)->clkconstr) { - if (edge == startdomain.first.edge) { - // same edge - period = ctx->nets.at(clksig)->clkconstr->period.minDelay(); - } else if (edge == RISING_EDGE) { - // falling -> rising - period = ctx->nets.at(clksig)->clkconstr->low.minDelay(); - } else if (edge == FALLING_EDGE) { - // rising -> falling - period = ctx->nets.at(clksig)->clkconstr->high.minDelay(); - } - } - } - nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i)); - }; - if (portClass == TMG_REGISTER_INPUT) { - for (int j = 0; j < port_clocks; j++) { - TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j); - const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port); - IdString clksig = clknet ? clknet->name : async_clock; - process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE, - clkInfo.setup.maxDelay()); - } - } else { - process_endpoint(async_clock, RISING_EDGE, 0); - } - } - net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay); - } - PortRef &drv = net->driver; - if (drv.cell == nullptr) - continue; - for (const auto &port : drv.cell->ports) { - if (port.second.type != PORT_IN || !port.second.net) - continue; - DelayQuad comb_delay; - bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay); - if (!is_path) - continue; - int cc; - auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc); - if (pclass != TMG_COMB_INPUT) - continue; - NetInfo *sink_net = port.second.net; - if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) { - auto &sink_nd = net_data.at(sink_net).at(startdomain.first); - if (sink_nd.min_required.empty()) - sink_nd.min_required.resize(sink_net->users.size(), - std::numeric_limits<delay_t>::max()); - for (size_t i = 0; i < sink_net->users.size(); i++) { - auto &user = sink_net->users.at(i); - if (user.cell == drv.cell && user.port == port.first) { - sink_nd.min_required.at(i) = std::min(sink_nd.min_required.at(i), - net_min_required - comb_delay.maxDelay()); - break; - } - } - } - } - } - } - std::unordered_map<ClockEvent, delay_t> worst_slack; - - // Assign slack values - for (auto &net_entry : net_data) { - const NetInfo *net = net_entry.first; - for (auto &startdomain : net_entry.second) { - auto &nd = startdomain.second; - if (startdomain.first.clock == async_clock) - continue; - if (nd.min_required.empty()) - continue; - auto &nc = (*net_crit)[net->name]; - if (nc.slack.empty()) - nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max()); - - for (size_t i = 0; i < net->users.size(); i++) { - delay_t slack = nd.min_required.at(i) - - (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); - - if (worst_slack.count(startdomain.first)) - worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); - else - worst_slack[startdomain.first] = slack; - nc.slack.at(i) = slack; - } - if (ctx->debug) - log_break(); - } - } - // Assign criticality values - for (auto &net_entry : net_data) { - const NetInfo *net = net_entry.first; - for (auto &startdomain : net_entry.second) { - if (startdomain.first.clock == async_clock) - continue; - auto &nd = startdomain.second; - if (nd.min_required.empty()) - continue; - auto &nc = (*net_crit)[net->name]; - if (nc.slack.empty()) - continue; - if (nc.criticality.empty()) - nc.criticality.resize(net->users.size(), 0); - // Only consider intra-clock paths for criticality - if (!crit_path->count(ClockPair{startdomain.first, startdomain.first})) - continue; - delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; - for (size_t i = 0; i < net->users.size(); i++) { - float criticality = - 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); - nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality)); - } - nc.max_path_length = nd.max_path_length; - nc.cd_worst_slack = worst_slack.at(startdomain.first); - } - } - } return min_slack; } @@ -999,12 +1387,4 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p } } -void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) -{ - CriticalPathMap crit_paths; - net_crit->clear(); - Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit); - timing.walk_paths(); -} - NEXTPNR_NAMESPACE_END diff --git a/common/timing.h b/common/timing.h index f1d18e8a..63c0fc74 100644 --- a/common/timing.h +++ b/common/timing.h @@ -24,6 +24,251 @@ NEXTPNR_NAMESPACE_BEGIN +struct CellPortKey +{ + CellPortKey(){}; + CellPortKey(IdString cell, IdString port) : cell(cell), port(port){}; + explicit CellPortKey(const PortRef &pr) + { + NPNR_ASSERT(pr.cell != nullptr); + cell = pr.cell->name; + port = pr.port; + } + IdString cell, port; + struct Hash + { + inline std::size_t operator()(const CellPortKey &arg) const noexcept + { + std::size_t seed = std::hash<IdString>()(arg.cell); + seed ^= std::hash<IdString>()(arg.port) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; + inline bool operator==(const CellPortKey &other) const { return (cell == other.cell) && (port == other.port); } + inline bool operator!=(const CellPortKey &other) const { return (cell != other.cell) || (port != other.port); } + inline bool operator<(const CellPortKey &other) const + { + return cell == other.cell ? port < other.port : cell < other.cell; + } +}; + +struct NetPortKey +{ + IdString net; + size_t idx; + NetPortKey(){}; + explicit NetPortKey(IdString net) : net(net), idx(DRIVER_IDX){}; // driver + explicit NetPortKey(IdString net, size_t user) : net(net), idx(user){}; // user + + static const size_t DRIVER_IDX = std::numeric_limits<size_t>::max(); + + inline bool is_driver() const { return (idx == DRIVER_IDX); } + inline size_t user_idx() const + { + NPNR_ASSERT(idx != DRIVER_IDX); + return idx; + } + + struct Hash + { + std::size_t operator()(const NetPortKey &arg) const noexcept + { + std::size_t seed = std::hash<IdString>()(arg.net); + seed ^= std::hash<size_t>()(arg.idx) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; + inline bool operator==(const NetPortKey &other) const { return (net == other.net) && (idx == other.idx); } +}; + +struct ClockDomainKey +{ + IdString clock; + ClockEdge edge; + ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge){}; + // probably also need something here to deal with constraints + inline bool is_async() const { return clock == IdString(); } + + struct Hash + { + std::size_t operator()(const ClockDomainKey &arg) const noexcept + { + std::size_t seed = std::hash<IdString>()(arg.clock); + seed ^= std::hash<int>()(int(arg.edge)) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; + inline bool operator==(const ClockDomainKey &other) const { return (clock == other.clock) && (edge == other.edge); } +}; + +typedef int domain_id_t; + +struct ClockDomainPairKey +{ + domain_id_t launch, capture; + ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture){}; + inline bool operator==(const ClockDomainPairKey &other) const + { + return (launch == other.launch) && (capture == other.capture); + } + struct Hash + { + std::size_t operator()(const ClockDomainPairKey &arg) const noexcept + { + std::size_t seed = std::hash<domain_id_t>()(arg.launch); + seed ^= std::hash<domain_id_t>()(arg.capture) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; +}; + +struct TimingAnalyser +{ + public: + TimingAnalyser(Context *ctx) : ctx(ctx){}; + void setup(); + void run(); + void print_report(); + + float get_criticality(CellPortKey port) const { return ports.at(port).worst_crit; } + float get_setup_slack(CellPortKey port) const { return ports.at(port).worst_setup_slack; } + float get_domain_setup_slack(CellPortKey port) const + { + delay_t slack = std::numeric_limits<delay_t>::max(); + for (const auto &dp : ports.at(port).domain_pairs) + slack = std::min(slack, domain_pairs.at(dp.first).worst_setup_slack); + return slack; + } + + bool setup_only = false; + bool verbose_mode = false; + + private: + void init_ports(); + void get_cell_delays(); + void get_route_delays(); + void topo_sort(); + void setup_port_domains(); + + void reset_times(); + + void walk_forward(); + void walk_backward(); + + void compute_slack(); + void compute_criticality(); + + void print_fmax(); + // get the N most failing endpoints for a given domain pair + std::vector<CellPortKey> get_failing_eps(domain_id_t domain_pair, int count); + // print the critical path for an endpoint and domain pair + void print_critical_path(CellPortKey endpoint, domain_id_t domain_pair); + + const DelayPair init_delay{std::numeric_limits<delay_t>::max(), std::numeric_limits<delay_t>::lowest()}; + + // Set arrival/required times if more/less than the current value + void set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length, + CellPortKey prev = CellPortKey()); + void set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length, + CellPortKey prev = CellPortKey()); + + // To avoid storing the domain tag structure (which could get large when considering more complex constrained tag + // cases), assign each domain an ID and use that instead + // An arrival or required time entry. Stores both the min/max delays; and the traversal to reach them for critical + // path reporting + struct ArrivReqTime + { + DelayPair value; + CellPortKey bwd_min, bwd_max; + int path_length; + }; + // Data per port-domain tuple + struct PortDomainPairData + { + delay_t setup_slack = std::numeric_limits<delay_t>::max(), hold_slack = std::numeric_limits<delay_t>::max(); + delay_t budget = std::numeric_limits<delay_t>::max(); + int max_path_length = 0; + float criticality = 0; + }; + + // A cell timing arc, used to cache cell timings and reduce the number of potentially-expensive Arch API calls + struct CellArc + { + + enum ArcType + { + COMBINATIONAL, + SETUP, + HOLD, + CLK_TO_Q + } type; + + IdString other_port; + DelayQuad value; + // Clock polarity, not used for combinational arcs + ClockEdge edge; + + CellArc(ArcType type, IdString other_port, DelayQuad value) + : type(type), other_port(other_port), value(value), edge(RISING_EDGE){}; + CellArc(ArcType type, IdString other_port, DelayQuad value, ClockEdge edge) + : type(type), other_port(other_port), value(value), edge(edge){}; + }; + + // Timing data for every cell port + struct PerPort + { + CellPortKey cell_port; + NetPortKey net_port; + PortType type; + // per domain timings + std::unordered_map<domain_id_t, ArrivReqTime> arrival; + std::unordered_map<domain_id_t, ArrivReqTime> required; + std::unordered_map<domain_id_t, PortDomainPairData> domain_pairs; + // cell timing arcs to (outputs)/from (inputs) from this port + std::vector<CellArc> cell_arcs; + // routing delay into this port (input ports only) + DelayPair route_delay; + // worst criticality and slack across domain pairs + float worst_crit; + delay_t worst_setup_slack, worst_hold_slack; + }; + + struct PerDomain + { + PerDomain(ClockDomainKey key) : key(key){}; + ClockDomainKey key; + // these are pairs (signal port; clock port) + std::vector<std::pair<CellPortKey, IdString>> startpoints, endpoints; + }; + + struct PerDomainPair + { + PerDomainPair(ClockDomainPairKey key) : key(key){}; + ClockDomainPairKey key; + DelayPair period; + delay_t worst_setup_slack, worst_hold_slack; + }; + + CellInfo *cell_info(const CellPortKey &key); + PortInfo &port_info(const CellPortKey &key); + + domain_id_t domain_id(IdString cell, IdString clock_port, ClockEdge edge); + domain_id_t domain_id(const NetInfo *net, ClockEdge edge); + domain_id_t domain_pair_id(domain_id_t launch, domain_id_t capture); + + void copy_domains(const CellPortKey &from, const CellPortKey &to, bool backwards); + + std::unordered_map<CellPortKey, PerPort, CellPortKey::Hash> ports; + std::unordered_map<ClockDomainKey, domain_id_t, ClockDomainKey::Hash> domain_to_id; + std::unordered_map<ClockDomainPairKey, domain_id_t, ClockDomainPairKey::Hash> pair_to_id; + std::vector<PerDomain> domains; + std::vector<PerDomainPair> domain_pairs; + + std::vector<CellPortKey> topological_order; + + Context *ctx; +}; + // Evenly redistribute the total path slack amongst all sinks on each path void assign_budget(Context *ctx, bool quiet = false); @@ -32,19 +277,6 @@ void assign_budget(Context *ctx, bool quiet = false); void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false, bool warn_on_failure = false); -// Data for the timing optimisation algorithm -struct NetCriticalityInfo -{ - // One each per user - std::vector<delay_t> slack; - std::vector<float> criticality; - unsigned max_path_length = 0; - delay_t cd_worst_slack = std::numeric_limits<delay_t>::max(); -}; - -typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap; -void get_criticalities(Context *ctx, NetCriticalityMap *net_crit); - NEXTPNR_NAMESPACE_END #endif diff --git a/common/timing_opt.cc b/common/timing_opt.cc index 28b7f2cf..51c27cc6 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -79,16 +79,17 @@ NEXTPNR_NAMESPACE_BEGIN class TimingOptimiser { public: - TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){}; + TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg), tmg(ctx){}; bool optimise() { log_info("Running timing-driven placement optimisation...\n"); ctx->lock(); if (ctx->verbose) timing_analysis(ctx, false, true, false, false); + tmg.setup(); for (int i = 0; i < 30; i++) { log_info(" Iteration %d...\n", i); - get_criticalities(ctx, &net_crit); + tmg.run(); setup_delay_limits(); auto crit_paths = find_crit_paths(0.98, 50000); for (auto &path : crit_paths) @@ -109,18 +110,14 @@ class TimingOptimiser for (auto usr : ni->users) { max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max(); } - if (!net_crit.count(net.first)) - continue; - auto &nc = net_crit.at(net.first); - if (nc.slack.empty()) - continue; for (size_t i = 0; i < ni->users.size(); i++) { auto &usr = ni->users.at(i); delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); - if (nc.max_path_length != 0) { - max_net_delay[std::make_pair(usr.cell->name, usr.port)] = - net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10); - } + delay_t slack = tmg.get_setup_slack(CellPortKey(usr)); + delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr)); + if (slack == std::numeric_limits<delay_t>::max()) + continue; + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = net_delay + ((slack - domain_slack) / 10); } } } @@ -283,12 +280,18 @@ class TimingOptimiser for (auto net : netnames) { if (crit_nets.size() >= max_count) break; - if (!net_crit.count(net)) - continue; - auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end()); - if (*crit_user > crit_thresh) - crit_nets.push_back( - std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); + float highest_crit = 0; + size_t crit_user_idx = 0; + NetInfo *ni = ctx->nets.at(net).get(); + for (size_t i = 0; i < ni->users.size(); i++) { + float crit = tmg.get_criticality(CellPortKey(ni->users.at(i))); + if (crit > highest_crit) { + highest_crit = crit; + crit_user_idx = i; + } + } + if (highest_crit > crit_thresh) + crit_nets.push_back(std::make_pair(ni, crit_user_idx)); } auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { @@ -325,8 +328,6 @@ class TimingOptimiser NetInfo *pn = port.second.net; if (pn == nullptr) continue; - if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) - continue; int ccount; DelayQuad combDelay; TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); @@ -336,7 +337,7 @@ class TimingOptimiser if (!is_path) continue; size_t user_idx = port_user_index(cell, port.second); - float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); + float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first)); if (used_ports.count(&(pn->users.at(user_idx)))) continue; if (usr_crit >= max_crit) { @@ -364,8 +365,7 @@ class TimingOptimiser NetInfo *pn = port.second.net; if (pn == nullptr) continue; - if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) - continue; + int ccount; DelayQuad combDelay; TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); @@ -374,12 +374,12 @@ class TimingOptimiser bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); if (!is_path) continue; - auto &crits = net_crit.at(pn->name).criticality; - for (size_t i = 0; i < crits.size(); i++) { + for (size_t i = 0; i < pn->users.size(); i++) { if (used_ports.count(&(pn->users.at(i)))) continue; - if (crits.at(i) >= max_crit) { - max_crit = crits.at(i); + float crit = tmg.get_criticality(CellPortKey(pn->users.at(i))); + if (crit >= max_crit) { + max_crit = crit; crit_sink = std::make_pair(pn, i); } } @@ -420,12 +420,7 @@ class TimingOptimiser for (auto port : path) { if (ctx->debug) { - float crit = 0; - NetInfo *pn = port->cell->ports.at(port->port).net; - if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty()) - for (size_t i = 0; i < pn->users.size(); i++) - if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port) - crit = net_crit.at(pn->name).criticality.at(i); + float crit = tmg.get_criticality(CellPortKey(*port)); log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), ctx->nameOfBel(port->cell->bel), crit); } @@ -613,10 +608,9 @@ class TimingOptimiser std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells; // Map cell ports to net delay limit std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay; - // Criticality data from timing analysis - NetCriticalityMap net_crit; Context *ctx; TimingOptCfg cfg; + TimingAnalyser tmg; }; bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } diff --git a/common/util.h b/common/util.h index 55718344..540646c7 100644 --- a/common/util.h +++ b/common/util.h @@ -181,6 +181,98 @@ template <typename ForwardRange> inline auto get_only_value(ForwardRange r) return get_only_value(b, e); } +// From Yosys +// https://github.com/YosysHQ/yosys/blob/0fb4224ebca86156a1296b9210116d9a9cbebeed/kernel/utils.h#L131 +template <typename T, typename C = std::less<T>> struct TopoSort +{ + bool analyze_loops, found_loops; + std::map<T, std::set<T, C>, C> database; + std::set<std::set<T, C>> loops; + std::vector<T> sorted; + + TopoSort() + { + analyze_loops = true; + found_loops = false; + } + + void node(T n) + { + if (database.count(n) == 0) + database[n] = std::set<T, C>(); + } + + void edge(T left, T right) + { + node(left); + database[right].insert(left); + } + + void sort_worker(const T &n, std::set<T, C> &marked_cells, std::set<T, C> &active_cells, + std::vector<T> &active_stack) + { + if (active_cells.count(n)) { + found_loops = true; + if (analyze_loops) { + std::set<T, C> loop; + for (int i = int(active_stack.size()) - 1; i >= 0; i--) { + loop.insert(active_stack[i]); + if (active_stack[i] == n) + break; + } + loops.insert(loop); + } + return; + } + + if (marked_cells.count(n)) + return; + + if (!database.at(n).empty()) { + if (analyze_loops) + active_stack.push_back(n); + active_cells.insert(n); + + for (auto &left_n : database.at(n)) + sort_worker(left_n, marked_cells, active_cells, active_stack); + + if (analyze_loops) + active_stack.pop_back(); + active_cells.erase(n); + } + + marked_cells.insert(n); + sorted.push_back(n); + } + + bool sort() + { + loops.clear(); + sorted.clear(); + found_loops = false; + + std::set<T, C> marked_cells; + std::set<T, C> active_cells; + std::vector<T> active_stack; + + for (auto &it : database) + sort_worker(it.first, marked_cells, active_cells, active_stack); + + NPNR_ASSERT(sorted.size() == database.size()); + return !found_loops; + } +}; + +template <typename T> struct reversed_range_t +{ + T &obj; + explicit reversed_range_t(T &obj) : obj(obj){}; + auto begin() { return obj.rbegin(); } + auto end() { return obj.rend(); } +}; + +template <typename T> reversed_range_t<T> reversed_range(T &obj) { return reversed_range_t<T>(obj); } + NEXTPNR_NAMESPACE_END #endif diff --git a/ecp5/arch_place.cc b/ecp5/arch_place.cc index 5565a01c..57c3b181 100644 --- a/ecp5/arch_place.cc +++ b/ecp5/arch_place.cc @@ -95,8 +95,8 @@ bool Arch::isBelLocationValid(BelId bel) const void Arch::permute_luts() { - NetCriticalityMap nc; - get_criticalities(getCtx(), &nc); + TimingAnalyser tmg(getCtx()); + tmg.setup(); std::unordered_map<PortInfo *, size_t> port_to_user; for (auto net : sorted(nets)) { @@ -121,13 +121,7 @@ void Arch::permute_luts() ci->ports[port_names.at(i)].type = PORT_IN; } auto &port = ci->ports.at(port_names.at(i)); - float crit = 0; - if (port.net != nullptr && nc.count(port.net->name)) { - auto &n = nc.at(port.net->name); - size_t usr = port_to_user.at(&port); - if (usr < n.criticality.size()) - crit = n.criticality.at(usr); - } + float crit = (port.net == nullptr) ? 0 : tmg.get_criticality(CellPortKey(ci->name, port_names.at(i))); orig_nets.push_back(port.net); inputs.emplace_back(crit, i); } diff --git a/nexus/post_place.cc b/nexus/post_place.cc index 65676188..b712aea3 100644 --- a/nexus/post_place.cc +++ b/nexus/post_place.cc @@ -28,9 +28,9 @@ NEXTPNR_NAMESPACE_BEGIN struct NexusPostPlaceOpt { Context *ctx; - NetCriticalityMap net_crit; + TimingAnalyser tmg; - NexusPostPlaceOpt(Context *ctx) : ctx(ctx){}; + NexusPostPlaceOpt(Context *ctx) : ctx(ctx), tmg(ctx){}; inline bool is_constrained(CellInfo *cell) { @@ -139,7 +139,7 @@ struct NexusPostPlaceOpt void operator()() { - get_criticalities(ctx, &net_crit); + tmg.setup(); opt_lutffs(); } |