From f207068ee2ee31d6bf3642cb6fc6bfdeffb2897f Mon Sep 17 00:00:00 2001
From: gatecat <gatecat@ds0.me>
Date: Sun, 15 Aug 2021 16:18:57 +0100
Subject: router2: Add experimental timing-driven ripup option

Signed-off-by: gatecat <gatecat@ds0.me>
---
 common/command.cc |  4 ++++
 common/router2.cc | 56 +++++++++++++++++++++++++++++++++++++++++++------------
 common/timing.cc  | 21 +++++++++++++++++++--
 3 files changed, 67 insertions(+), 14 deletions(-)
diff --git a/common/command.cc b/common/command.cc
index c175cd27..34bb93cb 100644
--- a/common/command.cc
+++ b/common/command.cc
@@ -173,6 +173,8 @@ po::options_description CommandHandler::getGeneralOptions()
     general.add_options()("router2-heatmap", po::value<std::string>(),
                           "prefix for router2 resource congestion heatmaps");
 
+    general.add_options()("router2-tmg-ripup", "enable experimental timing-driven ripup in router2");
+
     general.add_options()("report", po::value<std::string>(),
                           "write timing and utilization report in JSON format to file");
 
@@ -286,6 +288,8 @@ void CommandHandler::setupContext(Context *ctx)
         ctx->settings[ctx->id("placerHeap/timingWeight")] = std::to_string(vm["placer-heap-timingweight"].as<int>());
     if (vm.count("router2-heatmap"))
         ctx->settings[ctx->id("router2/heatmap")] = vm["router2-heatmap"].as<std::string>();
+    if (vm.count("router2-tmg-ripup"))
+        ctx->settings[ctx->id("router2/tmg_ripup")] = true;
 
     // Setting default values
     if (ctx->settings.find(ctx->id("target_freq")) == ctx->settings.end())
diff --git a/common/router2.cc b/common/router2.cc
index 0e3a6ebb..878b615e 100644
--- a/common/router2.cc
+++ b/common/router2.cc
@@ -104,7 +104,7 @@ struct Router2
     std::vector<NetInfo *> nets_by_udata;
     std::vector<PerNetData> nets;
 
-    bool timing_driven;
+    bool timing_driven, timing_driven_ripup;
     TimingAnalyser tmg;
 
     void setup_nets()
@@ -358,7 +358,7 @@ struct Router2
         float base_cost = ctx->getDelayNS(ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(wire).maxDelay() +
                                           ctx->getDelayEpsilon());
         int overuse = wd.curr_cong;
-        float hist_cost = wd.hist_cong_cost;
+        float hist_cost = 1.0f + crit_weight * (wd.hist_cong_cost - 1.0f);
         float bias_cost = 0;
         int source_uses = 0;
         if (nd.wires.count(wire)) {
@@ -574,6 +574,12 @@ struct Router2
         return tmg.get_criticality(CellPortKey(net->users.at(i)));
     }
 
+    bool arc_failed_slack(NetInfo *net, size_t usr_idx)
+    {
+        return timing_driven_ripup &&
+               (tmg.get_setup_slack(CellPortKey(net->users.at(usr_idx))) < (2 * ctx->getDelayEpsilon()));
+    }
+
     ArcRouteResult route_arc(ThreadContext &t, NetInfo *net, size_t i, size_t phys_pin, bool is_mt, bool is_bb = true)
     {
         // Do some initial lookups and checks
@@ -595,6 +601,7 @@ struct Router2
         // Calculate a timing weight based on criticality
         float crit = get_arc_crit(net, i);
         float crit_weight = (1.0f - std::pow(crit, 2));
+        ROUTE_LOG_DBG("     crit=%.3f crit_weight=%.3f\n", crit, crit_weight);
         // Check if arc was already done _in this iteration_
         if (t.processed_sinks.count(dst_wire))
             return ARC_SUCCESS;
@@ -603,7 +610,7 @@ struct Router2
         //     0. starting within a small range of existing routing
         //     1. expanding from all routing
         int mode = 0;
-        if (net->users.size() < 4 || nd.wires.empty())
+        if (net->users.size() < 4 || nd.wires.empty() || (crit > 0.95))
             mode = 1;
 
         // This records the point where forwards and backwards routing met
@@ -866,12 +873,15 @@ struct Router2
         t.wire_by_loc.clear();
         t.in_wire_by_loc.clear();
         auto &nd = nets.at(net->udata);
+        bool failed_slack = false;
+        for (size_t i = 0; i < net->users.size(); i++)
+            failed_slack |= arc_failed_slack(net, i);
         for (size_t i = 0; i < net->users.size(); i++) {
             auto &ad = nd.arcs.at(i);
             for (size_t j = 0; j < ad.size(); j++) {
                 // Ripup failed arcs to start with
                 // Check if arc is already legally routed
-                if (check_arc_routing(net, i, j)) {
+                if (!failed_slack && check_arc_routing(net, i, j)) {
                     update_wire_by_loc(t, net, i, j, true);
                     continue;
                 }
@@ -1357,14 +1367,17 @@ struct Router2
             route_queue.push_back(i);
 
         timing_driven = ctx->setting<bool>("timing_driven");
+        if (ctx->settings.count(ctx->id("router2/tmg_ripup")))
+            timing_driven_ripup = timing_driven && ctx->setting<bool>("router2/tmg_ripup");
+        else
+            timing_driven_ripup = false;
         log_info("Running main router loop...\n");
+        if (timing_driven)
+            tmg.run(true);
         do {
             ctx->sorted_shuffle(route_queue);
 
-            if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 500))) {
-                // Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few
-                // congested nodes
-                tmg.run(iter == 1);
+            if (timing_driven) {
                 for (auto n : route_queue) {
                     NetInfo *ni = nets_by_udata.at(n);
                     auto &net = nets.at(n);
@@ -1391,15 +1404,34 @@ struct Router2
                 write_wiretype_heatmap(cong_map);
                 log_info("        wrote wiretype heatmap to %s.\n", filename.c_str());
             }
-
-            if (overused_wires == 0) {
+            int tmgfail = 0;
+            if (timing_driven)
+                tmg.run(false);
+            if (timing_driven_ripup && iter < 500) {
+                for (size_t i = 0; i < nets_by_udata.size(); i++) {
+                    NetInfo *ni = nets_by_udata.at(i);
+                    for (size_t j = 0; j < ni->users.size(); j++) {
+                        if (arc_failed_slack(ni, j)) {
+                            failed_nets.insert(i);
+                            ++tmgfail;
+                        }
+                    }
+                }
+            }
+            if (overused_wires == 0 && tmgfail == 0) {
                 // Try and actually bind nextpnr Arch API wires
                 bind_and_check_all();
             }
             for (auto cn : failed_nets)
                 route_queue.push_back(cn);
-            log_info("    iter=%d wires=%d overused=%d overuse=%d archfail=%s\n", iter, total_wire_use, overused_wires,
-                     total_overuse, overused_wires > 0 ? "NA" : std::to_string(arch_fail).c_str());
+            if (timing_driven_ripup)
+                log_info("    iter=%d wires=%d overused=%d overuse=%d tmgfail=%d archfail=%s\n", iter, total_wire_use,
+                         overused_wires, total_overuse, tmgfail,
+                         (overused_wires > 0 || tmgfail > 0) ? "NA" : std::to_string(arch_fail).c_str());
+            else
+                log_info("    iter=%d wires=%d overused=%d overuse=%d archfail=%s\n", iter, total_wire_use,
+                         overused_wires, total_overuse,
+                         (overused_wires > 0 || tmgfail > 0) ? "NA" : std::to_string(arch_fail).c_str());
             ++iter;
             if (curr_cong_weight < 1e9)
                 curr_cong_weight += cfg.curr_cong_mult;
diff --git a/common/timing.cc b/common/timing.cc
index 0cdb5be2..161769dc 100644
--- a/common/timing.cc
+++ b/common/timing.cc
@@ -269,6 +269,22 @@ void TimingAnalyser::setup_port_domains()
         // If there are loops, repeat the process until a fixed point is reached, as there might be unusual ways to
         // visit points, which would result in a missing domain key and therefore crash later on
     } while (have_loops && updated_domains);
+    for (auto &dp : domain_pairs) {
+        auto &launch_data = domains.at(dp.key.launch);
+        auto &capture_data = domains.at(dp.key.capture);
+        if (launch_data.key.clock != capture_data.key.clock)
+            continue;
+        IdString clk = launch_data.key.clock;
+        if (!ctx->nets.count(clk))
+            continue;
+        NetInfo *clk_net = ctx->nets.at(clk).get();
+        if (!clk_net->clkconstr)
+            continue;
+        delay_t period = clk_net->clkconstr->period.minDelay();
+        if (launch_data.key.edge != capture_data.key.edge)
+            period /= 2;
+        dp.period = DelayPair(period);
+    }
 }
 
 void TimingAnalyser::reset_times()
@@ -457,11 +473,12 @@ void TimingAnalyser::compute_slack()
             auto &dp = domain_pairs.at(pdp.first);
             auto &arr = pd.arrival.at(dp.key.launch);
             auto &req = pd.required.at(dp.key.capture);
-            pdp.second.setup_slack = dp.period.minDelay() - (arr.value.maxDelay() - req.value.minDelay());
+            pdp.second.setup_slack = 0 - (arr.value.maxDelay() - req.value.minDelay());
             if (!setup_only)
                 pdp.second.hold_slack = arr.value.minDelay() - req.value.maxDelay();
             pdp.second.max_path_length = arr.path_length + req.path_length;
-            pd.worst_setup_slack = std::min(pd.worst_setup_slack, pdp.second.setup_slack);
+            if (dp.key.launch == dp.key.capture)
+                pd.worst_setup_slack = std::min(pd.worst_setup_slack, dp.period.minDelay() + pdp.second.setup_slack);
             dp.worst_setup_slack = std::min(dp.worst_setup_slack, pdp.second.setup_slack);
             if (!setup_only) {
                 pd.worst_hold_slack = std::min(pd.worst_hold_slack, pdp.second.hold_slack);
-- 
cgit v1.2.3