Merge pull request #163 from daveshah1/timing_opt

Adding criticality calculation and experimental timing optimisation pass
author: David Shah <davey1576@gmail.com> 2018-12-07 21:19:41 +0000
committer: GitHub <noreply@github.com> 2018-12-07 21:19:41 +0000
commit: d790d0bb91261939a784fe0f86ce6fa2f6245e55 (patch)
tree: 5634ef0c91bda07c15a25e42f06194e9ecbfbb51
parent: a9e1fab9fcdb0493343a8ba23b471255544f10ea (diff)
parent: b732e42fa312b83bee6c122d69e0a171afca779c (diff)
download: nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.tar.gz
nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.tar.bz2
nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.zip
7 files changed, 878 insertions, 4 deletions
diff --git a/common/timing.cc b/common/timing.cc
index 88ab14c2..b15327fb 100644
--- a/common/timing.cc
+++ b/common/timing.cc
@@ -86,6 +86,7 @@ struct CriticalPath
 };
 
 typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap;
+typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
 
 struct Timing
 {
@@ -95,6 +96,7 @@ struct Timing
     delay_t min_slack;
     CriticalPathMap *crit_path;
     DelayFrequency *slack_histogram;
+    NetCriticalityMap *net_crit;
     IdString async_clock;
 
     struct TimingData
@@ -105,13 +107,15 @@ struct Timing
         unsigned max_path_length = 0;
         delay_t min_remaining_budget;
         bool false_startpoint = false;
+        std::vector<delay_t> min_required;
         std::unordered_map<ClockEvent, delay_t> arrival_time;
     };
 
     Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr,
-           DelayFrequency *slack_histogram = nullptr)
+           DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr)
             : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->target_freq),
-              crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$"))
+              crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit),
+              async_clock(ctx->id("$async$"))
     {
     }
 
@@ -454,6 +458,180 @@ struct Timing
                 std::reverse(cp_ports.begin(), cp_ports.end());
             }
         }
+
+        if (net_crit) {
+            NPNR_ASSERT(crit_path);
+            // Go through in reverse topographical order to set required times
+            for (auto net : boost::adaptors::reverse(topographical_order)) {
+                if (!net_data.count(net))
+                    continue;
+                auto &nd_map = net_data.at(net);
+                for (auto &startdomain : nd_map) {
+                    auto &nd = startdomain.second;
+                    if (nd.false_startpoint)
+                        continue;
+                    if (startdomain.first.clock == async_clock)
+                        continue;
+                    if (nd.min_required.empty())
+                        nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max());
+                    delay_t net_min_required = std::numeric_limits<delay_t>::max();
+                    for (size_t i = 0; i < net->users.size(); i++) {
+                        auto &usr = net->users.at(i);
+                        auto net_delay = ctx->getNetinfoRouteDelay(net, usr);
+                        int port_clocks;
+                        TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
+                        if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
+                            auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) {
+                                delay_t period;
+                                // Set default period
+                                if (edge == startdomain.first.edge) {
+                                    period = clk_period;
+                                } else {
+                                    period = clk_period / 2;
+                                }
+                                if (clksig != async_clock) {
+                                    if (ctx->nets.at(clksig)->clkconstr) {
+                                        if (edge == startdomain.first.edge) {
+                                            // same edge
+                                            period = ctx->nets.at(clksig)->clkconstr->period.minDelay();
+                                        } else if (edge == RISING_EDGE) {
+                                            // falling -> rising
+                                            period = ctx->nets.at(clksig)->clkconstr->low.minDelay();
+                                        } else if (edge == FALLING_EDGE) {
+                                            // rising -> falling
+                                            period = ctx->nets.at(clksig)->clkconstr->high.minDelay();
+                                        }
+                                    }
+                                }
+                                nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i));
+                            };
+                            if (portClass == TMG_REGISTER_INPUT) {
+                                for (int j = 0; j < port_clocks; j++) {
+                                    TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j);
+                                    const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port);
+                                    IdString clksig = clknet ? clknet->name : async_clock;
+                                    process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE,
+                                                     clkInfo.setup.maxDelay());
+                                }
+                            } else {
+                                process_endpoint(async_clock, RISING_EDGE, 0);
+                            }
+                        }
+                        net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay);
+                    }
+                    PortRef &drv = net->driver;
+                    if (drv.cell == nullptr)
+                        continue;
+                    for (const auto &port : drv.cell->ports) {
+                        if (port.second.type != PORT_IN || !port.second.net)
+                            continue;
+                        DelayInfo comb_delay;
+                        bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay);
+                        if (!is_path)
+                            continue;
+                        int cc;
+                        auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc);
+                        if (pclass != TMG_COMB_INPUT)
+                            continue;
+                        NetInfo *sink_net = port.second.net;
+                        if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) {
+                            auto &sink_nd = net_data.at(sink_net).at(startdomain.first);
+                            if (sink_nd.min_required.empty())
+                                sink_nd.min_required.resize(sink_net->users.size(),
+                                                            std::numeric_limits<delay_t>::max());
+                            for (size_t i = 0; i < sink_net->users.size(); i++) {
+                                auto &user = sink_net->users.at(i);
+                                if (user.cell == drv.cell && user.port == port.first) {
+                                    sink_nd.min_required.at(i) = net_min_required - comb_delay.maxDelay();
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            std::unordered_map<ClockEvent, delay_t> worst_slack;
+
+            // Assign slack values
+            for (auto &net_entry : net_data) {
+                const NetInfo *net = net_entry.first;
+                for (auto &startdomain : net_entry.second) {
+                    auto &nd = startdomain.second;
+                    if (startdomain.first.clock == async_clock)
+                        continue;
+                    if (nd.min_required.empty())
+                        continue;
+                    auto &nc = (*net_crit)[net->name];
+                    if (nc.slack.empty())
+                        nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max());
+#if 0
+                    if (ctx->debug)
+                        log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx));
+#endif
+                    for (size_t i = 0; i < net->users.size(); i++) {
+                        delay_t slack = nd.min_required.at(i) -
+                                        (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i)));
+#if 0
+                        if (ctx->debug)
+                            log_info("    user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n",
+                                    net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx),
+                                    ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival),
+                                    ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack));
+#endif
+                        if (worst_slack.count(startdomain.first))
+                            worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack);
+                        else
+                            worst_slack[startdomain.first] = slack;
+                        nc.slack.at(i) = slack;
+                    }
+                    if (ctx->debug)
+                        log_break();
+                }
+            }
+            // Assign criticality values
+            for (auto &net_entry : net_data) {
+                const NetInfo *net = net_entry.first;
+                for (auto &startdomain : net_entry.second) {
+                    if (startdomain.first.clock == async_clock)
+                        continue;
+                    auto &nd = startdomain.second;
+                    if (nd.min_required.empty())
+                        continue;
+                    auto &nc = (*net_crit)[net->name];
+                    if (nc.slack.empty())
+                        continue;
+                    if (nc.criticality.empty())
+                        nc.criticality.resize(net->users.size(), 0);
+                    // Only consider intra-clock paths for criticality
+                    if (!crit_path->count(ClockPair{startdomain.first, startdomain.first}))
+                        continue;
+                    delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay;
+                    for (size_t i = 0; i < net->users.size(); i++) {
+                        float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax);
+                        nc.criticality.at(i) = criticality;
+                    }
+                    nc.max_path_length = nd.max_path_length;
+                    nc.cd_worst_slack = worst_slack.at(startdomain.first);
+                }
+            }
+#if 0
+            if (ctx->debug) {
+                for (auto &nc : *net_crit) {
+                    NetInfo *net = ctx->nets.at(nc.first).get();
+                    log_info("Net %s maxlen %d worst_slack %.02fns: \n", nc.first.c_str(ctx), nc.second.max_path_length,
+                             ctx->getDelayNS(nc.second.cd_worst_slack));
+                    if (!nc.second.criticality.empty() && !nc.second.slack.empty()) {
+                        for (size_t i = 0; i < net->users.size(); i++) {
+                            log_info("   user %s.%s slack %.02fns crit %.03f\n", net->users.at(i).cell->name.c_str(ctx),
+                                     net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nc.second.slack.at(i)),
+                                     nc.second.criticality.at(i));
+                        }
+                    }
+                    log_break();
+                }
+            }
+#endif
+        }
         return min_slack;
     }
 
@@ -766,4 +944,12 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p
     }
 }
 
+void get_criticalities(Context *ctx, NetCriticalityMap *net_crit)
+{
+    CriticalPathMap crit_paths;
+    net_crit->clear();
+    Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit);
+    timing.walk_paths();
+}
+
 NEXTPNR_NAMESPACE_END
diff --git a/common/timing.h b/common/timing.h
index 42f928dc..f1d18e8a 100644
--- a/common/timing.h
+++ b/common/timing.h
@@ -32,6 +32,19 @@ void assign_budget(Context *ctx, bool quiet = false);
 void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false,
                      bool warn_on_failure = false);
 
+// Data for the timing optimisation algorithm
+struct NetCriticalityInfo
+{
+    // One each per user
+    std::vector<delay_t> slack;
+    std::vector<float> criticality;
+    unsigned max_path_length = 0;
+    delay_t cd_worst_slack = std::numeric_limits<delay_t>::max();
+};
+
+typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
+void get_criticalities(Context *ctx, NetCriticalityMap *net_crit);
+
 NEXTPNR_NAMESPACE_END
 
 #endif
diff --git a/common/timing_opt.cc b/common/timing_opt.cc
new file mode 100644
index 00000000..a451bfa0
--- /dev/null
+++ b/common/timing_opt.cc
@@ -0,0 +1,623 @@
+/*
+ *  nextpnr -- Next Generation Place and Route
+ *
+ *  Copyright (C) 2018  David Shah <david@symbioticeda.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+ * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells
+ * on a critical path
+ *
+ * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs"
+ * https://www.cerc.utexas.edu/utda/publications/C205.pdf
+ *
+ * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles,
+ * and deal with the fact that not every cell on the crit path may be swappable.
+ */
+
+#include "timing_opt.h"
+#include <boost/range/adaptor/reversed.hpp>
+#include <queue>
+#include "nextpnr.h"
+#include "timing.h"
+#include "util.h"
+
+namespace std {
+
+template <> struct hash<std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX IdString>>
+{
+    std::size_t
+    operator()(const std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX IdString> &idp) const
+            noexcept
+    {
+        std::size_t seed = 0;
+        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.first));
+        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.second));
+        return seed;
+    }
+};
+
+template <> struct hash<std::pair<int, NEXTPNR_NAMESPACE_PREFIX BelId>>
+{
+    std::size_t operator()(const std::pair<int, NEXTPNR_NAMESPACE_PREFIX BelId> &idp) const noexcept
+    {
+        std::size_t seed = 0;
+        boost::hash_combine(seed, hash<int>()(idp.first));
+        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX BelId>()(idp.second));
+        return seed;
+    }
+};
+
+template <> struct hash<std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX BelId>>
+{
+    std::size_t
+    operator()(const std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX BelId> &idp) const noexcept
+    {
+        std::size_t seed = 0;
+        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.first));
+        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX BelId>()(idp.second));
+        return seed;
+    }
+};
+} // namespace std
+
+NEXTPNR_NAMESPACE_BEGIN
+
+class TimingOptimiser
+{
+  public:
+    TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){};
+    bool optimise()
+    {
+        log_info("Running timing-driven placement optimisation...\n");
+        if (ctx->verbose)
+            timing_analysis(ctx, false, true, false, false);
+        for (int i = 0; i < 30; i++) {
+            log_info("   Iteration %d...\n", i);
+            get_criticalities(ctx, &net_crit);
+            setup_delay_limits();
+            auto crit_paths = find_crit_paths(0.98, 50000);
+            for (auto &path : crit_paths)
+                optimise_path(path);
+            if (ctx->verbose)
+                timing_analysis(ctx, false, true, false, false);
+        }
+        return true;
+    }
+
+  private:
+    void setup_delay_limits()
+    {
+        max_net_delay.clear();
+        for (auto net : sorted(ctx->nets)) {
+            NetInfo *ni = net.second;
+            for (auto usr : ni->users) {
+                max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max();
+            }
+            if (!net_crit.count(net.first))
+                continue;
+            auto &nc = net_crit.at(net.first);
+            if (nc.slack.empty())
+                continue;
+            for (size_t i = 0; i < ni->users.size(); i++) {
+                auto &usr = ni->users.at(i);
+                delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr);
+                if (nc.max_path_length != 0) {
+                    max_net_delay[std::make_pair(usr.cell->name, usr.port)] =
+                            net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10);
+                }
+            }
+        }
+    }
+
+    bool check_cell_delay_limits(CellInfo *cell)
+    {
+        for (const auto &port : cell->ports) {
+            int nc;
+            if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE)
+                continue;
+            NetInfo *net = port.second.net;
+            if (net == nullptr)
+                continue;
+            if (port.second.type == PORT_IN) {
+                if (net->driver.cell == nullptr || net->driver.cell->bel == BelId())
+                    continue;
+                for (auto user : net->users) {
+                    if (user.cell == cell && user.port == port.first) {
+                        if (ctx->predictDelay(net, user) >
+                            1.1 * max_net_delay.at(std::make_pair(cell->name, port.first)))
+                            return false;
+                    }
+                }
+
+            } else if (port.second.type == PORT_OUT) {
+                for (auto user : net->users) {
+                    // This could get expensive for high-fanout nets??
+                    BelId dstBel = user.cell->bel;
+                    if (dstBel == BelId())
+                        continue;
+                    if (ctx->predictDelay(net, user) >
+                        1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) {
+
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    BelId cell_swap_bel(CellInfo *cell, BelId newBel)
+    {
+        BelId oldBel = cell->bel;
+        if (oldBel == newBel)
+            return oldBel;
+        CellInfo *other_cell = ctx->getBoundBelCell(newBel);
+        NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK);
+        ctx->unbindBel(oldBel);
+        if (other_cell != nullptr) {
+            ctx->unbindBel(newBel);
+            ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK);
+        }
+        ctx->bindBel(newBel, cell, STRENGTH_WEAK);
+        return oldBel;
+    }
+
+    // Check that a series of moves are both legal and remain within maximum delay bounds
+    // Moves are specified as a vector of pairs <cell, oldBel>
+    bool acceptable_move(std::vector<std::pair<CellInfo *, BelId>> &move, bool check_delays = true)
+    {
+        for (auto &entry : move) {
+            if (!ctx->isBelLocationValid(entry.first->bel))
+                return false;
+            if (!ctx->isBelLocationValid(entry.second))
+                return false;
+            if (!check_delays)
+                continue;
+            if (!check_cell_delay_limits(entry.first))
+                return false;
+            // We might have swapped another cell onto the original bel. Check this for max delay violations
+            // too
+            CellInfo *swapped = ctx->getBoundBelCell(entry.second);
+            if (swapped != nullptr && !check_cell_delay_limits(swapped))
+                return false;
+        }
+        return true;
+    }
+
+    int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap)
+    {
+        BelId curr = cell->bel;
+        Loc curr_loc = ctx->getBelLocation(curr);
+        int found_count = 0;
+        cell_neighbour_bels[cell->name] = std::unordered_set<BelId>{};
+        for (int dy = -d; dy <= d; dy++) {
+            for (int dx = -d; dx <= d; dx++) {
+                // Go through all the Bels at this location
+                // First, find all bels of the correct type that are either unbound or bound normally
+                // Strongly bound bels are ignored
+                // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros
+                std::vector<BelId> free_bels_at_loc;
+                std::vector<BelId> bound_bels_at_loc;
+                for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) {
+                    if (ctx->getBelType(bel) != cell->type)
+                        continue;
+                    CellInfo *bound = ctx->getBoundBelCell(bel);
+                    if (bound == nullptr) {
+                        free_bels_at_loc.push_back(bel);
+                    } else if (bound->belStrength <= STRENGTH_WEAK && bound->constr_parent == nullptr &&
+                               bound->constr_children.empty()) {
+                        bound_bels_at_loc.push_back(bel);
+                    }
+                }
+                BelId candidate;
+
+                while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) {
+                    BelId try_bel;
+                    if (!free_bels_at_loc.empty()) {
+                        int try_idx = ctx->rng(int(free_bels_at_loc.size()));
+                        try_bel = free_bels_at_loc.at(try_idx);
+                        free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx);
+                    } else {
+                        int try_idx = ctx->rng(int(bound_bels_at_loc.size()));
+                        try_bel = bound_bels_at_loc.at(try_idx);
+                        bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx);
+                    }
+                    if (bel_candidate_cells.count(try_bel) && !allow_swap) {
+                        // Overlap is only allowed if it is with the previous cell (this is handled by removing those
+                        // edges in the graph), or if allow_swap is true to deal with cases where overlap means few
+                        // neighbours are identified
+                        if (bel_candidate_cells.at(try_bel).size() > 1 ||
+                            (bel_candidate_cells.at(try_bel).size() == 1 &&
+                             *(bel_candidate_cells.at(try_bel).begin()) != prev_cell))
+                            continue;
+                    }
+                    // TODO: what else to check here?
+                    candidate = try_bel;
+                    break;
+                }
+
+                if (candidate != BelId()) {
+                    cell_neighbour_bels[cell->name].insert(candidate);
+                    bel_candidate_cells[candidate].insert(cell->name);
+                    // Work out if we need to delete any overlap
+                    std::vector<IdString> overlap;
+                    for (auto other : bel_candidate_cells[candidate])
+                        if (other != cell->name && other != prev_cell)
+                            overlap.push_back(other);
+                    if (overlap.size() > 0)
+                        NPNR_ASSERT(allow_swap);
+                    for (auto ov : overlap) {
+                        bel_candidate_cells[candidate].erase(ov);
+                        cell_neighbour_bels[ov].erase(candidate);
+                    }
+                }
+            }
+        }
+        return found_count;
+    }
+
+    std::vector<std::vector<PortRef *>> find_crit_paths(float crit_thresh, size_t max_count)
+    {
+        std::vector<std::vector<PortRef *>> crit_paths;
+        std::vector<std::pair<NetInfo *, int>> crit_nets;
+        std::vector<IdString> netnames;
+        std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames),
+                       [](const std::pair<const IdString, std::unique_ptr<NetInfo>> &kv) { return kv.first; });
+        ctx->sorted_shuffle(netnames);
+        for (auto net : netnames) {
+            if (crit_nets.size() >= max_count)
+                break;
+            if (!net_crit.count(net))
+                continue;
+            auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end());
+            if (*crit_user > crit_thresh)
+                crit_nets.push_back(
+                        std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin()));
+        }
+
+        auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t {
+            NPNR_ASSERT(port.net != nullptr);
+            for (size_t i = 0; i < port.net->users.size(); i++) {
+                auto &usr = port.net->users.at(i);
+                if (usr.cell == cell && usr.port == port.name)
+                    return i;
+            }
+            NPNR_ASSERT_FALSE("port user not found on net");
+        };
+        std::unordered_set<PortRef *> used_ports;
+
+        for (auto crit_net : crit_nets) {
+
+            if (used_ports.count(&(crit_net.first->users.at(crit_net.second))))
+                continue;
+
+            std::deque<PortRef *> crit_path;
+
+            // FIXME: This will fail badly on combinational loops
+
+            // Iterate backwards following greatest criticality
+            NetInfo *back_cursor = crit_net.first;
+            while (back_cursor != nullptr) {
+                float max_crit = 0;
+                std::pair<NetInfo *, size_t> crit_sink{nullptr, 0};
+                CellInfo *cell = back_cursor->driver.cell;
+                if (cell == nullptr)
+                    break;
+                for (auto port : cell->ports) {
+                    if (port.second.type != PORT_IN)
+                        continue;
+                    NetInfo *pn = port.second.net;
+                    if (pn == nullptr)
+                        continue;
+                    if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
+                        continue;
+                    int ccount;
+                    DelayInfo combDelay;
+                    TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
+                    if (tpclass != TMG_COMB_INPUT)
+                        continue;
+                    bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay);
+                    if (!is_path)
+                        continue;
+                    size_t user_idx = port_user_index(cell, port.second);
+                    float usr_crit = net_crit.at(pn->name).criticality.at(user_idx);
+                    if (used_ports.count(&(pn->users.at(user_idx))))
+                        continue;
+                    if (usr_crit >= max_crit) {
+                        max_crit = usr_crit;
+                        crit_sink = std::make_pair(pn, user_idx);
+                    }
+                }
+
+                if (crit_sink.first != nullptr) {
+                    crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second)));
+                    used_ports.insert(&(crit_sink.first->users.at(crit_sink.second)));
+                }
+                back_cursor = crit_sink.first;
+            }
+            // Iterate forwards following greatest criticiality
+            PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second));
+            while (fwd_cursor != nullptr) {
+                crit_path.push_back(fwd_cursor);
+                float max_crit = 0;
+                std::pair<NetInfo *, size_t> crit_sink{nullptr, 0};
+                CellInfo *cell = fwd_cursor->cell;
+                for (auto port : cell->ports) {
+                    if (port.second.type != PORT_OUT)
+                        continue;
+                    NetInfo *pn = port.second.net;
+                    if (pn == nullptr)
+                        continue;
+                    if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
+                        continue;
+                    int ccount;
+                    DelayInfo combDelay;
+                    TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
+                    if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT)
+                        continue;
+                    bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay);
+                    if (!is_path)
+                        continue;
+                    auto &crits = net_crit.at(pn->name).criticality;
+                    for (size_t i = 0; i < crits.size(); i++) {
+                        if (used_ports.count(&(pn->users.at(i))))
+                            continue;
+                        if (crits.at(i) >= max_crit) {
+                            max_crit = crits.at(i);
+                            crit_sink = std::make_pair(pn, i);
+                        }
+                    }
+                }
+                if (crit_sink.first != nullptr) {
+                    fwd_cursor = &(crit_sink.first->users.at(crit_sink.second));
+                    used_ports.insert(&(crit_sink.first->users.at(crit_sink.second)));
+                } else {
+                    fwd_cursor = nullptr;
+                }
+            }
+
+            std::vector<PortRef *> crit_path_vec;
+            std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec));
+            crit_paths.push_back(crit_path_vec);
+        }
+
+        return crit_paths;
+    }
+
+    void optimise_path(std::vector<PortRef *> &path)
+    {
+        path_cells.clear();
+        cell_neighbour_bels.clear();
+        bel_candidate_cells.clear();
+        if (ctx->debug)
+            log_info("Optimising the following path: \n");
+
+        auto front_port = path.front();
+        NetInfo *front_net = front_port->cell->ports.at(front_port->port).net;
+        if (front_net != nullptr && front_net->driver.cell != nullptr) {
+            auto front_cell = front_net->driver.cell;
+            if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) &&
+                front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) {
+                path_cells.push_back(front_cell->name);
+            }
+        }
+
+        for (auto port : path) {
+            if (ctx->debug) {
+                float crit = 0;
+                NetInfo *pn = port->cell->ports.at(port->port).net;
+                if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty())
+                    for (size_t i = 0; i < pn->users.size(); i++)
+                        if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port)
+                            crit = net_crit.at(pn->name).criticality.at(i);
+                log_info("    %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx),
+                         ctx->getBelName(port->cell->bel).c_str(ctx), crit);
+            }
+            if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end())
+                continue;
+            if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) ||
+                port->cell->constr_parent != nullptr || !port->cell->constr_children.empty())
+                continue;
+            if (ctx->debug)
+                log_info("        can move\n");
+            path_cells.push_back(port->cell->name);
+        }
+
+        if (path_cells.size() < 2) {
+            if (ctx->debug) {
+                log_info("Too few moveable cells; skipping path\n");
+                log_break();
+            }
+
+            return;
+        }
+
+        // Calculate original delay before touching anything
+        delay_t original_delay = 0;
+
+        for (size_t i = 0; i < path.size(); i++) {
+            NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net;
+            for (size_t j = 0; j < pn->users.size(); j++) {
+                auto &usr = pn->users.at(j);
+                if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) {
+                    original_delay += ctx->predictDelay(pn, usr);
+                    break;
+                }
+            }
+        }
+
+        IdString last_cell;
+        const int d = 2; // FIXME: how to best determine d
+        for (auto cell : path_cells) {
+            // FIXME: when should we allow swapping due to a lack of candidates
+            find_neighbours(ctx->cells[cell].get(), last_cell, d, false);
+            last_cell = cell;
+        }
+
+        if (ctx->debug) {
+            for (auto cell : path_cells) {
+                log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx),
+                         ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx));
+                for (auto neigh : cell_neighbour_bels.at(cell)) {
+                    log_info("    %s\n", ctx->getBelName(neigh).c_str(ctx));
+                }
+            }
+        }
+
+        // Actual BFS path optimisation algorithm
+        std::unordered_map<IdString, std::unordered_map<BelId, delay_t>> cumul_costs;
+        std::unordered_map<std::pair<IdString, BelId>, std::pair<IdString, BelId>> backtrace;
+        std::queue<std::pair<int, BelId>> visit;
+        std::unordered_set<std::pair<int, BelId>> to_visit;
+
+        for (auto startbel : cell_neighbour_bels[path_cells.front()]) {
+            // Swap for legality check
+            CellInfo *cell = ctx->cells.at(path_cells.front()).get();
+            BelId origBel = cell_swap_bel(cell, startbel);
+            std::vector<std::pair<CellInfo *, BelId>> move{std::make_pair(cell, origBel)};
+            if (acceptable_move(move)) {
+                auto entry = std::make_pair(0, startbel);
+                visit.push(entry);
+                cumul_costs[path_cells.front()][startbel] = 0;
+            }
+            // Swap back
+            cell_swap_bel(cell, origBel);
+        }
+
+        while (!visit.empty()) {
+            auto entry = visit.front();
+            visit.pop();
+            auto cellname = path_cells.at(entry.first);
+            if (entry.first == int(path_cells.size()) - 1)
+                continue;
+            std::vector<std::pair<CellInfo *, BelId>> move;
+            // Apply the entire backtrace for accurate legality and delay checks
+            // This is probably pretty expensive (but also probably pales in comparison to the number of swaps
+            // SA will make...)
+            std::vector<std::pair<IdString, BelId>> route_to_entry;
+            auto cursor = std::make_pair(cellname, entry.second);
+            route_to_entry.push_back(cursor);
+            while (backtrace.count(cursor)) {
+                cursor = backtrace.at(cursor);
+                route_to_entry.push_back(cursor);
+            }
+            for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) {
+                CellInfo *cell = ctx->cells.at(rt_entry.first).get();
+                BelId origBel = cell_swap_bel(cell, rt_entry.second);
+                move.push_back(std::make_pair(cell, origBel));
+            }
+
+            // Have a look at where we can travel from here
+            for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) {
+                // Edges between overlapping bels are deleted
+                if (neighbour == entry.second)
+                    continue;
+                // Experimentally swap the next path cell onto the neighbour bel we are trying
+                IdString ncname = path_cells.at(entry.first + 1);
+                CellInfo *next_cell = ctx->cells.at(ncname).get();
+                BelId origBel = cell_swap_bel(next_cell, neighbour);
+                move.push_back(std::make_pair(next_cell, origBel));
+
+                delay_t total_delay = 0;
+
+                for (size_t i = 0; i < path.size(); i++) {
+                    NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net;
+                    for (size_t j = 0; j < pn->users.size(); j++) {
+                        auto &usr = pn->users.at(j);
+                        if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) {
+                            total_delay += ctx->predictDelay(pn, usr);
+                            break;
+                        }
+                    }
+                    if (path.at(i)->cell == next_cell)
+                        break;
+                }
+
+                // First, check if the move is actually worthwhile from a delay point of view before the expensive
+                // legality check
+                if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) ||
+                    cumul_costs.at(ncname).at(neighbour) > total_delay) {
+                    // Now check that the swaps we have made to get here are legal and meet max delay requirements
+                    if (acceptable_move(move)) {
+                        cumul_costs[ncname][neighbour] = total_delay;
+                        backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second);
+                        if (!to_visit.count(std::make_pair(entry.first + 1, neighbour)))
+                            visit.push(std::make_pair(entry.first + 1, neighbour));
+                    }
+                }
+                // Revert the experimental swap
+                cell_swap_bel(move.back().first, move.back().second);
+                move.pop_back();
+            }
+
+            // Revert move by swapping cells back to their original order
+            // Execute swaps in reverse order to how we made them originally
+            for (auto move_entry : boost::adaptors::reverse(move)) {
+                cell_swap_bel(move_entry.first, move_entry.second);
+            }
+        }
+
+        // Did we find a solution??
+        if (cumul_costs.count(path_cells.back())) {
+            // Find the end position with the lowest total delay
+            auto &end_options = cumul_costs.at(path_cells.back());
+            auto lowest = std::min_element(end_options.begin(), end_options.end(),
+                                           [](const std::pair<BelId, delay_t> &a, const std::pair<BelId, delay_t> &b) {
+                                               return a.second < b.second;
+                                           });
+            NPNR_ASSERT(lowest != end_options.end());
+
+            std::vector<std::pair<IdString, BelId>> route_to_solution;
+            auto cursor = std::make_pair(path_cells.back(), lowest->first);
+            route_to_solution.push_back(cursor);
+            while (backtrace.count(cursor)) {
+                cursor = backtrace.at(cursor);
+                route_to_solution.push_back(cursor);
+            }
+            if (ctx->debug)
+                log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n",
+                         ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay));
+            for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) {
+                CellInfo *cell = ctx->cells.at(rt_entry.first).get();
+                cell_swap_bel(cell, rt_entry.second);
+                if (ctx->debug)
+                    log_info("    %s at %s\n", rt_entry.first.c_str(ctx), ctx->getBelName(rt_entry.second).c_str(ctx));
+            }
+
+        } else {
+            if (ctx->debug)
+                log_info("Solution was not found\n");
+        }
+        if (ctx->debug)
+            log_break();
+    }
+
+    // Current candidate Bels for cells (linked in both direction>
+    std::vector<IdString> path_cells;
+    std::unordered_map<IdString, std::unordered_set<BelId>> cell_neighbour_bels;
+    std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells;
+    // Map cell ports to net delay limit
+    std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay;
+    // Criticality data from timing analysis
+    NetCriticalityMap net_crit;
+    Context *ctx;
+    TimingOptCfg cfg;
+};
+
+bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); }
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/timing_opt.h b/common/timing_opt.h
new file mode 100644
index 00000000..ceb35c71
--- /dev/null
+++ b/common/timing_opt.h
@@ -0,0 +1,37 @@
+/*
+ *  nextpnr -- Next Generation Place and Route
+ *
+ *  Copyright (C) 2018  David Shah <david@symbioticeda.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "nextpnr.h"
+#include "settings.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct TimingOptCfg : public Settings
+{
+    TimingOptCfg(Context *ctx) : Settings(ctx) {}
+
+    // The timing optimiser will *only* optimise cells of these types
+    // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense
+    // for other cell types
+    std::unordered_set<IdString> cellTypes;
+};
+
+extern bool timing_opt(Context *ctx, TimingOptCfg cfg);
+
+NEXTPNR_NAMESPACE_END
diff --git a/ice40/arch.cc b/ice40/arch.cc
index ada78020..8f52987c 100644
--- a/ice40/arch.cc
+++ b/ice40/arch.cc
@@ -26,6 +26,7 @@
 #include "nextpnr.h"
 #include "placer1.h"
 #include "router1.h"
+#include "timing_opt.h"
 #include "util.h"
 
 NEXTPNR_NAMESPACE_BEGIN
@@ -626,7 +627,18 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay
 
 // -----------------------------------------------------------------------
 
-bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); }
+bool Arch::place()
+{
+    if (!placer1(getCtx(), Placer1Cfg(getCtx())))
+        return false;
+    if (bool_or_default(settings, id("opt_timing"), false)) {
+        TimingOptCfg tocfg(getCtx());
+        tocfg.cellTypes.insert(id_ICESTORM_LC);
+        return timing_opt(getCtx(), tocfg);
+    } else {
+        return true;
+    }
+}
 
 bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); }
 
diff --git a/ice40/main.cc b/ice40/main.cc
index 4b6a9e42..543bd229 100644
--- a/ice40/main.cc
+++ b/ice40/main.cc
@@ -68,6 +68,7 @@ po::options_description Ice40CommandHandler::getArchOptions()
     specific.add_options()("promote-logic",
                            "enable promotion of 'logic' globals (in addition to clk/ce/sr by default)");
     specific.add_options()("no-promote-globals", "disable all global promotion");
+    specific.add_options()("opt-timing", "run post-placement timing optimisation pass (experimental)");
     specific.add_options()("tmfuzz", "run path delay estimate fuzzer");
     return specific;
 }
@@ -161,6 +162,8 @@ std::unique_ptr<Context> Ice40CommandHandler::createContext()
         ctx->settings[ctx->id("promote_logic")] = "1";
     if (vm.count("no-promote-globals"))
         ctx->settings[ctx->id("no_promote_globals")] = "1";
+    if (vm.count("opt-timing"))
+        ctx->settings[ctx->id("opt_timing")] = "1";
     return ctx;
 }
 
diff --git a/ice40/picorv32_benchmark.py b/ice40/picorv32_benchmark.py
index a4ec581e..5e4fc2e1 100755
--- a/ice40/picorv32_benchmark.py
+++ b/ice40/picorv32_benchmark.py
@@ -22,7 +22,7 @@ for i in range(num_runs):
         ascfile = "picorv32_work/picorv32_s{}.asc".format(run)
         if path.exists(ascfile):
             os.remove(ascfile)
-        result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "70"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
+        result = subprocess.run(["../nextpnr-ice40", "--hx8k", "--seed", str(run), "--json", "picorv32.json", "--asc", ascfile, "--freq", "40", "--opt-timing"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
         if result.returncode != 0:
             print("Run {} failed!".format(run))
         else:
author	David Shah <davey1576@gmail.com>	2018-12-07 21:19:41 +0000
committer	GitHub <noreply@github.com>	2018-12-07 21:19:41 +0000
commit	d790d0bb91261939a784fe0f86ce6fa2f6245e55 (patch)
tree	5634ef0c91bda07c15a25e42f06194e9ecbfbb51
parent	a9e1fab9fcdb0493343a8ba23b471255544f10ea (diff)
parent	b732e42fa312b83bee6c122d69e0a171afca779c (diff)
download	nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.tar.gz nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.tar.bz2 nextpnr-d790d0bb91261939a784fe0f86ce6fa2f6245e55.zip