From f207068ee2ee31d6bf3642cb6fc6bfdeffb2897f Mon Sep 17 00:00:00 2001 From: gatecat Date: Sun, 15 Aug 2021 16:18:57 +0100 Subject: router2: Add experimental timing-driven ripup option Signed-off-by: gatecat --- common/command.cc | 4 ++++ common/router2.cc | 56 +++++++++++++++++++++++++++++++++++++++++++------------ common/timing.cc | 21 +++++++++++++++++++-- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/common/command.cc b/common/command.cc index c175cd27..34bb93cb 100644 --- a/common/command.cc +++ b/common/command.cc @@ -173,6 +173,8 @@ po::options_description CommandHandler::getGeneralOptions() general.add_options()("router2-heatmap", po::value(), "prefix for router2 resource congestion heatmaps"); + general.add_options()("router2-tmg-ripup", "enable experimental timing-driven ripup in router2"); + general.add_options()("report", po::value(), "write timing and utilization report in JSON format to file"); @@ -286,6 +288,8 @@ void CommandHandler::setupContext(Context *ctx) ctx->settings[ctx->id("placerHeap/timingWeight")] = std::to_string(vm["placer-heap-timingweight"].as()); if (vm.count("router2-heatmap")) ctx->settings[ctx->id("router2/heatmap")] = vm["router2-heatmap"].as(); + if (vm.count("router2-tmg-ripup")) + ctx->settings[ctx->id("router2/tmg_ripup")] = true; // Setting default values if (ctx->settings.find(ctx->id("target_freq")) == ctx->settings.end()) diff --git a/common/router2.cc b/common/router2.cc index 0e3a6ebb..878b615e 100644 --- a/common/router2.cc +++ b/common/router2.cc @@ -104,7 +104,7 @@ struct Router2 std::vector nets_by_udata; std::vector nets; - bool timing_driven; + bool timing_driven, timing_driven_ripup; TimingAnalyser tmg; void setup_nets() @@ -358,7 +358,7 @@ struct Router2 float base_cost = ctx->getDelayNS(ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(wire).maxDelay() + ctx->getDelayEpsilon()); int overuse = wd.curr_cong; - float hist_cost = wd.hist_cong_cost; + float hist_cost = 1.0f + crit_weight * (wd.hist_cong_cost - 1.0f); float bias_cost = 0; int source_uses = 0; if (nd.wires.count(wire)) { @@ -574,6 +574,12 @@ struct Router2 return tmg.get_criticality(CellPortKey(net->users.at(i))); } + bool arc_failed_slack(NetInfo *net, size_t usr_idx) + { + return timing_driven_ripup && + (tmg.get_setup_slack(CellPortKey(net->users.at(usr_idx))) < (2 * ctx->getDelayEpsilon())); + } + ArcRouteResult route_arc(ThreadContext &t, NetInfo *net, size_t i, size_t phys_pin, bool is_mt, bool is_bb = true) { // Do some initial lookups and checks @@ -595,6 +601,7 @@ struct Router2 // Calculate a timing weight based on criticality float crit = get_arc_crit(net, i); float crit_weight = (1.0f - std::pow(crit, 2)); + ROUTE_LOG_DBG(" crit=%.3f crit_weight=%.3f\n", crit, crit_weight); // Check if arc was already done _in this iteration_ if (t.processed_sinks.count(dst_wire)) return ARC_SUCCESS; @@ -603,7 +610,7 @@ struct Router2 // 0. starting within a small range of existing routing // 1. expanding from all routing int mode = 0; - if (net->users.size() < 4 || nd.wires.empty()) + if (net->users.size() < 4 || nd.wires.empty() || (crit > 0.95)) mode = 1; // This records the point where forwards and backwards routing met @@ -866,12 +873,15 @@ struct Router2 t.wire_by_loc.clear(); t.in_wire_by_loc.clear(); auto &nd = nets.at(net->udata); + bool failed_slack = false; + for (size_t i = 0; i < net->users.size(); i++) + failed_slack |= arc_failed_slack(net, i); for (size_t i = 0; i < net->users.size(); i++) { auto &ad = nd.arcs.at(i); for (size_t j = 0; j < ad.size(); j++) { // Ripup failed arcs to start with // Check if arc is already legally routed - if (check_arc_routing(net, i, j)) { + if (!failed_slack && check_arc_routing(net, i, j)) { update_wire_by_loc(t, net, i, j, true); continue; } @@ -1357,14 +1367,17 @@ struct Router2 route_queue.push_back(i); timing_driven = ctx->setting("timing_driven"); + if (ctx->settings.count(ctx->id("router2/tmg_ripup"))) + timing_driven_ripup = timing_driven && ctx->setting("router2/tmg_ripup"); + else + timing_driven_ripup = false; log_info("Running main router loop...\n"); + if (timing_driven) + tmg.run(true); do { ctx->sorted_shuffle(route_queue); - if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 500))) { - // Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few - // congested nodes - tmg.run(iter == 1); + if (timing_driven) { for (auto n : route_queue) { NetInfo *ni = nets_by_udata.at(n); auto &net = nets.at(n); @@ -1391,15 +1404,34 @@ struct Router2 write_wiretype_heatmap(cong_map); log_info(" wrote wiretype heatmap to %s.\n", filename.c_str()); } - - if (overused_wires == 0) { + int tmgfail = 0; + if (timing_driven) + tmg.run(false); + if (timing_driven_ripup && iter < 500) { + for (size_t i = 0; i < nets_by_udata.size(); i++) { + NetInfo *ni = nets_by_udata.at(i); + for (size_t j = 0; j < ni->users.size(); j++) { + if (arc_failed_slack(ni, j)) { + failed_nets.insert(i); + ++tmgfail; + } + } + } + } + if (overused_wires == 0 && tmgfail == 0) { // Try and actually bind nextpnr Arch API wires bind_and_check_all(); } for (auto cn : failed_nets) route_queue.push_back(cn); - log_info(" iter=%d wires=%d overused=%d overuse=%d archfail=%s\n", iter, total_wire_use, overused_wires, - total_overuse, overused_wires > 0 ? "NA" : std::to_string(arch_fail).c_str()); + if (timing_driven_ripup) + log_info(" iter=%d wires=%d overused=%d overuse=%d tmgfail=%d archfail=%s\n", iter, total_wire_use, + overused_wires, total_overuse, tmgfail, + (overused_wires > 0 || tmgfail > 0) ? "NA" : std::to_string(arch_fail).c_str()); + else + log_info(" iter=%d wires=%d overused=%d overuse=%d archfail=%s\n", iter, total_wire_use, + overused_wires, total_overuse, + (overused_wires > 0 || tmgfail > 0) ? "NA" : std::to_string(arch_fail).c_str()); ++iter; if (curr_cong_weight < 1e9) curr_cong_weight += cfg.curr_cong_mult; diff --git a/common/timing.cc b/common/timing.cc index 0cdb5be2..161769dc 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -269,6 +269,22 @@ void TimingAnalyser::setup_port_domains() // If there are loops, repeat the process until a fixed point is reached, as there might be unusual ways to // visit points, which would result in a missing domain key and therefore crash later on } while (have_loops && updated_domains); + for (auto &dp : domain_pairs) { + auto &launch_data = domains.at(dp.key.launch); + auto &capture_data = domains.at(dp.key.capture); + if (launch_data.key.clock != capture_data.key.clock) + continue; + IdString clk = launch_data.key.clock; + if (!ctx->nets.count(clk)) + continue; + NetInfo *clk_net = ctx->nets.at(clk).get(); + if (!clk_net->clkconstr) + continue; + delay_t period = clk_net->clkconstr->period.minDelay(); + if (launch_data.key.edge != capture_data.key.edge) + period /= 2; + dp.period = DelayPair(period); + } } void TimingAnalyser::reset_times() @@ -457,11 +473,12 @@ void TimingAnalyser::compute_slack() auto &dp = domain_pairs.at(pdp.first); auto &arr = pd.arrival.at(dp.key.launch); auto &req = pd.required.at(dp.key.capture); - pdp.second.setup_slack = dp.period.minDelay() - (arr.value.maxDelay() - req.value.minDelay()); + pdp.second.setup_slack = 0 - (arr.value.maxDelay() - req.value.minDelay()); if (!setup_only) pdp.second.hold_slack = arr.value.minDelay() - req.value.maxDelay(); pdp.second.max_path_length = arr.path_length + req.path_length; - pd.worst_setup_slack = std::min(pd.worst_setup_slack, pdp.second.setup_slack); + if (dp.key.launch == dp.key.capture) + pd.worst_setup_slack = std::min(pd.worst_setup_slack, dp.period.minDelay() + pdp.second.setup_slack); dp.worst_setup_slack = std::min(dp.worst_setup_slack, pdp.second.setup_slack); if (!setup_only) { pd.worst_hold_slack = std::min(pd.worst_hold_slack, pdp.second.hold_slack); -- cgit v1.2.3