diff options
Diffstat (limited to 'common')
| -rw-r--r-- | common/chain_utils.h | 3 | ||||
| -rw-r--r-- | common/command.cc | 67 | ||||
| -rw-r--r-- | common/design_utils.cc | 21 | ||||
| -rw-r--r-- | common/design_utils.h | 3 | ||||
| -rw-r--r-- | common/nextpnr.cc | 32 | ||||
| -rw-r--r-- | common/nextpnr.h | 9 | ||||
| -rw-r--r-- | common/place_common.cc | 12 | ||||
| -rw-r--r-- | common/place_common.h | 4 | ||||
| -rw-r--r-- | common/placer1.cc | 984 | ||||
| -rw-r--r-- | common/placer1.h | 4 | ||||
| -rw-r--r-- | common/placer_heap.cc | 1545 | ||||
| -rw-r--r-- | common/placer_heap.h | 47 | ||||
| -rw-r--r-- | common/pybindings.cc | 62 | ||||
| -rw-r--r-- | common/pycontainers.h | 14 | ||||
| -rw-r--r-- | common/pywrappers.h | 137 | ||||
| -rw-r--r-- | common/router1.cc | 4 | ||||
| -rw-r--r-- | common/settings.h | 27 | ||||
| -rw-r--r-- | common/timing.cc | 235 | ||||
| -rw-r--r-- | common/timing.h | 13 | ||||
| -rw-r--r-- | common/timing_opt.cc | 626 | ||||
| -rw-r--r-- | common/timing_opt.h | 37 | 
21 files changed, 3660 insertions, 226 deletions
| diff --git a/common/chain_utils.h b/common/chain_utils.h index b783e30b..300d96a1 100644 --- a/common/chain_utils.h +++ b/common/chain_utils.h @@ -51,7 +51,8 @@ std::vector<CellChain> find_chains(const Context *ctx, F1 cell_type_predicate, F              CellChain chain;              CellInfo *end = start;              while (end != nullptr) { -                chain.cells.push_back(end); +                if (chained.insert(end->name).second) +                    chain.cells.push_back(end);                  end = get_next(ctx, end);              }              if (chain.cells.size() >= min_length) { diff --git a/common/command.cc b/common/command.cc index d332375a..49081e72 100644 --- a/common/command.cc +++ b/common/command.cc @@ -27,6 +27,7 @@  #include "pybindings.h"  #endif +#include <boost/algorithm/string/join.hpp>  #include <boost/filesystem/convenience.hpp>  #include <boost/program_options.hpp>  #include <fstream> @@ -77,6 +78,20 @@ bool CommandHandler::executeBeforeContext()          return true;      }      validate(); + +    if (vm.count("quiet")) { +        log_streams.push_back(std::make_pair(&std::cerr, LogLevel::WARNING_MSG)); +    } else { +        log_streams.push_back(std::make_pair(&std::cerr, LogLevel::LOG_MSG)); +    } + +    if (vm.count("log")) { +        std::string logfilename = vm["log"].as<std::string>(); +        logfile.open(logfilename); +        if (!logfile.is_open()) +            log_error("Failed to open log file '%s' for writing.\n", logfilename.c_str()); +        log_streams.push_back(std::make_pair(&logfile, LogLevel::LOG_MSG)); +    }      return false;  } @@ -106,13 +121,26 @@ po::options_description CommandHandler::getGeneralOptions()      general.add_options()("json", po::value<std::string>(), "JSON design file to ingest");      general.add_options()("seed", po::value<int>(), "seed value for random number generator");      general.add_options()("randomize-seed,r", "randomize seed value for random number generator"); + +    general.add_options()( +            "placer", po::value<std::string>(), +            std::string("placer algorithm to use; available: " + boost::algorithm::join(Arch::availablePlacers, ", ") + +                        "; default: " + Arch::defaultPlacer) +                    .c_str()); +      general.add_options()("slack_redist_iter", po::value<int>(), "number of iterations between slack redistribution");      general.add_options()("cstrweight", po::value<float>(), "placer weighting for relative constraint satisfaction"); +    general.add_options()("starttemp", po::value<float>(), "placer SA start temperature"); +    general.add_options()("placer-budgets", "use budget rather than criticality in placer timing weights"); +      general.add_options()("pack-only", "pack design only without placement or routing"); +    general.add_options()("ignore-loops", "ignore combinational loops in timing analysis"); +      general.add_options()("version,V", "show version");      general.add_options()("test", "check architecture database integrity");      general.add_options()("freq", po::value<double>(), "set target frequency for design in MHz"); +    general.add_options()("timing-allow-fail", "allow timing to fail in design");      general.add_options()("no-tmdriv", "disable timing-driven placement");      general.add_options()("save", po::value<std::string>(), "project file to write");      general.add_options()("load", po::value<std::string>(), "project file to read"); @@ -130,20 +158,6 @@ void CommandHandler::setupContext(Context *ctx)          ctx->debug = true;      } -    if (vm.count("quiet")) { -        log_streams.push_back(std::make_pair(&std::cerr, LogLevel::WARNING_MSG)); -    } else { -        log_streams.push_back(std::make_pair(&std::cerr, LogLevel::LOG_MSG)); -    } - -    if (vm.count("log")) { -        std::string logfilename = vm["log"].as<std::string>(); -        logfile = std::ofstream(logfilename); -        if (!logfile) -            log_error("Failed to open log file '%s' for writing.\n", logfilename.c_str()); -        log_streams.push_back(std::make_pair(&logfile, LogLevel::LOG_MSG)); -    } -      if (vm.count("force")) {          ctx->force = true;      } @@ -172,10 +186,35 @@ void CommandHandler::setupContext(Context *ctx)          }      } +    if (vm.count("ignore-loops")) { +        settings->set("timing/ignoreLoops", true); +    } + +    if (vm.count("timing-allow-fail")) { +        settings->set("timing/allowFail", true); +    } + +    if (vm.count("placer")) { +        std::string placer = vm["placer"].as<std::string>(); +        if (std::find(Arch::availablePlacers.begin(), Arch::availablePlacers.end(), placer) == +            Arch::availablePlacers.end()) +            log_error("Placer algorithm '%s' is not supported (available options: %s)\n", placer.c_str(), +                      boost::algorithm::join(Arch::availablePlacers, ", ").c_str()); +        settings->set("placer", placer); +    } else { +        settings->set("placer", Arch::defaultPlacer); +    } +      if (vm.count("cstrweight")) {          settings->set("placer1/constraintWeight", vm["cstrweight"].as<float>());      } +    if (vm.count("starttemp")) { +        settings->set("placer1/startTemp", vm["starttemp"].as<float>()); +    } +    if (vm.count("placer-budgets")) { +        settings->set("placer1/budgetBased", true); +    }      if (vm.count("freq")) {          auto freq = vm["freq"].as<double>();          if (freq > 0) diff --git a/common/design_utils.cc b/common/design_utils.cc index a0b87764..bdf5ca5c 100644 --- a/common/design_utils.cc +++ b/common/design_utils.cc @@ -27,6 +27,8 @@ NEXTPNR_NAMESPACE_BEGIN  void replace_port(CellInfo *old_cell, IdString old_name, CellInfo *rep_cell, IdString rep_name)  { +    if (!old_cell->ports.count(old_name)) +        return;      PortInfo &old = old_cell->ports.at(old_name);      PortInfo &rep = rep_cell->ports.at(rep_name);      NPNR_ASSERT(old.type == rep.type); @@ -107,6 +109,8 @@ void disconnect_port(const Context *ctx, CellInfo *cell, IdString port_name)                                                   return user.cell == cell && user.port == port_name;                                               }),                                port.net->users.end()); +        if (port.net->driver.cell == cell && port.net->driver.port == port_name) +            port.net->driver.cell = nullptr;      }  } @@ -125,4 +129,21 @@ void connect_ports(Context *ctx, CellInfo *cell1, IdString port1_name, CellInfo      connect_port(ctx, port1.net, cell2, port2_name);  } +void rename_port(Context *ctx, CellInfo *cell, IdString old_name, IdString new_name) +{ +    if (!cell->ports.count(old_name)) +        return; +    PortInfo pi = cell->ports.at(old_name); +    if (pi.net != nullptr) { +        if (pi.net->driver.cell == cell && pi.net->driver.port == old_name) +            pi.net->driver.port = new_name; +        for (auto &usr : pi.net->users) +            if (usr.cell == cell && usr.port == old_name) +                usr.port = new_name; +    } +    cell->ports.erase(old_name); +    pi.name = new_name; +    cell->ports[new_name] = pi; +} +  NEXTPNR_NAMESPACE_END diff --git a/common/design_utils.h b/common/design_utils.h index 8a42d21f..3eb9024f 100644 --- a/common/design_utils.h +++ b/common/design_utils.h @@ -91,6 +91,9 @@ void disconnect_port(const Context *ctx, CellInfo *cell, IdString port_name);  // Connect two ports together  void connect_ports(Context *ctx, CellInfo *cell1, IdString port1_name, CellInfo *cell2, IdString port2_name); +// Rename a port if it exists on a cell +void rename_port(Context *ctx, CellInfo *cell, IdString old_name, IdString new_name); +  void print_utilisation(const Context *ctx);  NEXTPNR_NAMESPACE_END diff --git a/common/nextpnr.cc b/common/nextpnr.cc index bb941d3d..daaadf28 100644 --- a/common/nextpnr.cc +++ b/common/nextpnr.cc @@ -221,6 +221,9 @@ delay_t Context::getNetinfoRouteDelay(const NetInfo *net_info, const PortRef &us          return 0;  #endif +    if (net_info->wires.empty()) +        return predictDelay(net_info, user_info); +      WireId src_wire = getNetinfoSourceWire(net_info);      if (src_wire == WireId())          return 0; @@ -421,4 +424,33 @@ void BaseCtx::addClock(IdString net, float freq)      }  } +void BaseCtx::createRectangularRegion(IdString name, int x0, int y0, int x1, int y1) +{ +    std::unique_ptr<Region> new_region(new Region()); +    new_region->name = name; +    new_region->constr_bels = true; +    new_region->constr_pips = false; +    new_region->constr_wires = false; +    for (int x = x0; x <= x1; x++) { +        for (int y = y0; y <= y1; y++) { +            for (auto bel : getCtx()->getBelsByTile(x, y)) +                new_region->bels.insert(bel); +        } +    } +    region[name] = std::move(new_region); +} +void BaseCtx::addBelToRegion(IdString name, BelId bel) { region[name]->bels.insert(bel); } +void BaseCtx::constrainCellToRegion(IdString cell, IdString region_name) +{ +    cells[cell]->region = region[region_name].get(); +} +DecalXY BaseCtx::constructDecalXY(DecalId decal, float x, float y) +{ +    DecalXY dxy; +    dxy.decal = decal; +    dxy.x = x; +    dxy.y = y; +    return dxy; +} +  NEXTPNR_NAMESPACE_END diff --git a/common/nextpnr.h b/common/nextpnr.h index d58ae529..fc49300e 100644 --- a/common/nextpnr.h +++ b/common/nextpnr.h @@ -181,6 +181,9 @@ struct GraphicElement      float x1 = 0, y1 = 0, x2 = 0, y2 = 0, z = 0;      std::string text; +    GraphicElement(){}; +    GraphicElement(type_t type, style_t style, float x1, float y1, float x2, float y2, float z) +            : type(type), style(style), x1(x1), y1(y1), x2(x2), y2(y2), z(z){};  };  struct Loc @@ -637,6 +640,12 @@ struct BaseCtx      // Intended to simplify Python API      void addClock(IdString net, float freq); +    void createRectangularRegion(IdString name, int x0, int y0, int x1, int y1); +    void addBelToRegion(IdString name, BelId bel); +    void constrainCellToRegion(IdString cell, IdString region_name); + +    // Workaround for lack of wrappable constructors +    DecalXY constructDecalXY(DecalId decal, float x, float y);  };  NEXTPNR_NAMESPACE_END diff --git a/common/place_common.cc b/common/place_common.cc index b3eb4267..73a320d0 100644 --- a/common/place_common.cc +++ b/common/place_common.cc @@ -304,7 +304,7 @@ class ConstraintLegaliseWorker      // Set the strength to locked on all cells in chain      void lockdown_chain(CellInfo *root)      { -        root->belStrength = STRENGTH_LOCKED; +        root->belStrength = STRENGTH_STRONG;          for (auto child : root->constr_children)              lockdown_chain(child);      } @@ -380,7 +380,7 @@ class ConstraintLegaliseWorker                                  rippedCells.insert(confl_cell->name);                              }                          } -                        ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_LOCKED); +                        ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_STRONG);                          rippedCells.erase(cp.first);                      }                      for (auto cp : solution) { @@ -529,4 +529,12 @@ int get_constraints_distance(const Context *ctx, const CellInfo *cell)      return dist;  } +bool check_cell_bel_region(const CellInfo *cell, BelId bel) +{ +    if (cell->region != nullptr && cell->region->constr_bels && !cell->region->bels.count(bel)) +        return false; +    else +        return true; +} +  NEXTPNR_NAMESPACE_END diff --git a/common/place_common.h b/common/place_common.h index 79dec067..fa5ce4c2 100644 --- a/common/place_common.h +++ b/common/place_common.h @@ -49,6 +49,10 @@ bool legalise_relative_constraints(Context *ctx);  // Get the total distance from satisfied constraints for a cell  int get_constraints_distance(const Context *ctx, const CellInfo *cell); + +// Check that a Bel is within the region for a cell +bool check_cell_bel_region(const CellInfo *cell, BelId bel); +  NEXTPNR_NAMESPACE_END  #endif diff --git a/common/placer1.cc b/common/placer1.cc index b42ef2ff..a8ddd8a6 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -24,6 +24,8 @@  #include "placer1.h"  #include <algorithm>  #include <boost/lexical_cast.hpp> +#include <boost/range/adaptor/reversed.hpp> +#include <chrono>  #include <cmath>  #include <iostream>  #include <limits> @@ -42,10 +44,33 @@  #include "timing.h"  #include "util.h" +namespace std { +template <> struct hash<std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, std::size_t>> +{ +    std::size_t operator()(const std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, std::size_t> &idp) const noexcept +    { +        std::size_t seed = 0; +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.first)); +        boost::hash_combine(seed, hash<std::size_t>()(idp.second)); +        return seed; +    } +}; +} // namespace std +  NEXTPNR_NAMESPACE_BEGIN  class SAPlacer  { +  private: +    struct BoundingBox +    { +        // Actual bounding box +        int x0 = 0, x1 = 0, y0 = 0, y1 = 0; +        // Number of cells at each extremity +        int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; +        wirelen_t hpwl() const { return wirelen_t((x1 - x0) + (y1 - y0)); } +    }; +    public:      SAPlacer(Context *ctx, Placer1Cfg cfg) : ctx(ctx), cfg(cfg)      { @@ -77,13 +102,41 @@ class SAPlacer          }          diameter = std::max(max_x, max_y) + 1; -        costs.resize(ctx->nets.size()); +        net_bounds.resize(ctx->nets.size()); +        net_arc_tcost.resize(ctx->nets.size());          old_udata.reserve(ctx->nets.size()); +        net_by_udata.reserve(ctx->nets.size());          decltype(NetInfo::udata) n = 0;          for (auto &net : ctx->nets) {              old_udata.emplace_back(net.second->udata); +            net_arc_tcost.at(n).resize(net.second->users.size());              net.second->udata = n++; +            net_by_udata.push_back(net.second.get());          } +        for (auto ®ion : sorted(ctx->region)) { +            Region *r = region.second; +            BoundingBox bb; +            if (r->constr_bels) { +                bb.x0 = std::numeric_limits<int>::max(); +                bb.x1 = std::numeric_limits<int>::min(); +                bb.y0 = std::numeric_limits<int>::max(); +                bb.y1 = std::numeric_limits<int>::min(); +                for (auto bel : r->bels) { +                    Loc loc = ctx->getBelLocation(bel); +                    bb.x0 = std::min(bb.x0, loc.x); +                    bb.x1 = std::max(bb.x1, loc.x); +                    bb.y0 = std::min(bb.y0, loc.y); +                    bb.y1 = std::max(bb.y1, loc.y); +                } +            } else { +                bb.x0 = 0; +                bb.y0 = 0; +                bb.x1 = max_x; +                bb.y1 = max_y; +            } +            region_bounds[r->name] = bb; +        } +        build_port_index();      }      ~SAPlacer() @@ -92,95 +145,122 @@ class SAPlacer              net.second->udata = old_udata[net.second->udata];      } -    bool place() +    bool place(bool refine = false)      {          log_break();          ctx->lock();          size_t placed_cells = 0; -        // Initial constraints placer -        for (auto &cell_entry : ctx->cells) { -            CellInfo *cell = cell_entry.second.get(); -            auto loc = cell->attrs.find(ctx->id("BEL")); -            if (loc != cell->attrs.end()) { -                std::string loc_name = loc->second; -                BelId bel = ctx->getBelByName(ctx->id(loc_name)); -                if (bel == BelId()) { -                    log_error("No Bel named \'%s\' located for " -                              "this chip (processing BEL attribute on \'%s\')\n", -                              loc_name.c_str(), cell->name.c_str(ctx)); -                } +        std::vector<CellInfo *> autoplaced; +        std::vector<CellInfo *> chain_basis; +        if (!refine) { +            // Initial constraints placer +            for (auto &cell_entry : ctx->cells) { +                CellInfo *cell = cell_entry.second.get(); +                auto loc = cell->attrs.find(ctx->id("BEL")); +                if (loc != cell->attrs.end()) { +                    std::string loc_name = loc->second; +                    BelId bel = ctx->getBelByName(ctx->id(loc_name)); +                    if (bel == BelId()) { +                        log_error("No Bel named \'%s\' located for " +                                  "this chip (processing BEL attribute on \'%s\')\n", +                                  loc_name.c_str(), cell->name.c_str(ctx)); +                    } -                IdString bel_type = ctx->getBelType(bel); -                if (bel_type != cell->type) { -                    log_error("Bel \'%s\' of type \'%s\' does not match cell " -                              "\'%s\' of type \'%s\'\n", -                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); -                } -                if (!ctx->isValidBelForCell(cell, bel)) { -                    log_error("Bel \'%s\' of type \'%s\' is not valid for cell " -                              "\'%s\' of type \'%s\'\n", -                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); -                } +                    IdString bel_type = ctx->getBelType(bel); +                    if (bel_type != cell->type) { +                        log_error("Bel \'%s\' of type \'%s\' does not match cell " +                                  "\'%s\' of type \'%s\'\n", +                                  loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); +                    } +                    if (!ctx->isValidBelForCell(cell, bel)) { +                        log_error("Bel \'%s\' of type \'%s\' is not valid for cell " +                                  "\'%s\' of type \'%s\'\n", +                                  loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); +                    } -                auto bound_cell = ctx->getBoundBelCell(bel); -                if (bound_cell) { -                    log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", -                              cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); -                } +                    auto bound_cell = ctx->getBoundBelCell(bel); +                    if (bound_cell) { +                        log_error( +                                "Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", +                                cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); +                    } -                ctx->bindBel(bel, cell, STRENGTH_USER); -                locked_bels.insert(bel); -                placed_cells++; +                    ctx->bindBel(bel, cell, STRENGTH_USER); +                    locked_bels.insert(bel); +                    placed_cells++; +                }              } -        } -        int constr_placed_cells = placed_cells; -        log_info("Placed %d cells based on constraints.\n", int(placed_cells)); -        ctx->yield(); +            int constr_placed_cells = placed_cells; +            log_info("Placed %d cells based on constraints.\n", int(placed_cells)); +            ctx->yield(); -        // Sort to-place cells for deterministic initial placement -        std::vector<CellInfo *> autoplaced; -        for (auto &cell : ctx->cells) { -            CellInfo *ci = cell.second.get(); -            if (ci->bel == BelId()) { -                autoplaced.push_back(cell.second.get()); -            } -        } -        std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; }); -        ctx->shuffle(autoplaced); +            // Sort to-place cells for deterministic initial placement -        // Place cells randomly initially -        log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size())); +            for (auto &cell : ctx->cells) { +                CellInfo *ci = cell.second.get(); +                if (ci->bel == BelId()) { +                    autoplaced.push_back(cell.second.get()); +                } +            } +            std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; }); +            ctx->shuffle(autoplaced); +            auto iplace_start = std::chrono::high_resolution_clock::now(); +            // Place cells randomly initially +            log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size())); -        for (auto cell : autoplaced) { -            place_initial(cell); -            placed_cells++; -            if ((placed_cells - constr_placed_cells) % 500 == 0) +            for (auto cell : autoplaced) { +                place_initial(cell); +                placed_cells++; +                if ((placed_cells - constr_placed_cells) % 500 == 0) +                    log_info("  initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), +                             int(autoplaced.size())); +            } +            if ((placed_cells - constr_placed_cells) % 500 != 0)                  log_info("  initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells),                           int(autoplaced.size())); +            if (cfg.budgetBased && ctx->slack_redist_iter > 0) +                assign_budget(ctx); +            ctx->yield(); +            auto iplace_end = std::chrono::high_resolution_clock::now(); +            log_info("Initial placement time %.02fs\n", +                     std::chrono::duration<float>(iplace_end - iplace_start).count()); +            log_info("Running simulated annealing placer.\n"); +        } else { +            for (auto &cell : ctx->cells) { +                CellInfo *ci = cell.second.get(); +                if (ci->belStrength > STRENGTH_STRONG) +                    continue; +                else if (ci->constr_parent != nullptr) +                    continue; +                else if (!ci->constr_children.empty() || ci->constr_z != ci->UNCONSTR) +                    chain_basis.push_back(ci); +                else +                    autoplaced.push_back(ci); +            } +            require_legal = false; +            diameter = 3; +            log_info("Running simulated annealing placer for refinement.\n");          } -        if ((placed_cells - constr_placed_cells) % 500 != 0) -            log_info("  initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), -                     int(autoplaced.size())); -        if (ctx->slack_redist_iter > 0) -            assign_budget(ctx); -        ctx->yield(); +        auto saplace_start = std::chrono::high_resolution_clock::now(); -        log_info("Running simulated annealing placer.\n"); +        // Invoke timing analysis to obtain criticalities +        if (!cfg.budgetBased) +            get_criticalities(ctx, &net_crit); -        // Calculate metric after initial placement -        curr_metric = 0; -        curr_tns = 0; -        for (auto &net : ctx->nets) { -            wirelen_t wl = get_net_metric(ctx, net.second.get(), MetricType::COST, curr_tns); -            costs[net.second->udata] = CostChange{wl, -1}; -            curr_metric += wl; -        } +        // Calculate costs after initial placement +        setup_costs(); +        moveChange.init(this); +        curr_wirelen_cost = total_wirelen_cost(); +        curr_timing_cost = total_timing_cost(); +        last_wirelen_cost = curr_wirelen_cost; +        last_timing_cost = curr_timing_cost; + +        wirelen_t avg_wirelen = curr_wirelen_cost; +        wirelen_t min_wirelen = curr_wirelen_cost;          int n_no_progress = 0; -        wirelen_t min_metric = curr_metric; -        double avg_metric = curr_metric; -        temp = 10000; +        temp = refine ? 1e-7 : cfg.startTemp;          // Main simulated annealing loop          for (int iter = 1;; iter++) { @@ -188,9 +268,9 @@ class SAPlacer              improved = false;              if (iter % 5 == 0 || iter == 1) -                log_info("  at iteration #%d: temp = %f, cost = " -                         "%.0f, est tns = %.02fns\n", -                         iter, temp, double(curr_metric), curr_tns); +                log_info("  at iteration #%d: temp = %f, timing cost = " +                         "%.0f, wirelen = %.0f\n", +                         iter, temp, double(curr_timing_cost), double(curr_wirelen_cost));              for (int m = 0; m < 15; ++m) {                  // Loop through all automatically placed cells @@ -202,10 +282,35 @@ class SAPlacer                      if (try_bel != BelId() && try_bel != cell->bel)                          try_swap_position(cell, try_bel);                  } +                // Also try swapping chains, if applicable +                for (auto cb : chain_basis) { +                    Loc chain_base_loc = ctx->getBelLocation(cb->bel); +                    BelId try_base = random_bel_for_cell(cb, chain_base_loc.z); +                    if (try_base != BelId() && try_base != cb->bel) +                        try_swap_chain(cb, try_base); +                } +            } + +            if (ctx->debug) { +                // Verify correctness of incremental wirelen updates +                for (size_t i = 0; i < net_bounds.size(); i++) { +                    auto net = net_by_udata[i]; +                    if (ignore_net(net)) +                        continue; +                    auto &incr = net_bounds.at(i), gold = get_net_bounds(net); +                    NPNR_ASSERT(incr.x0 == gold.x0); +                    NPNR_ASSERT(incr.x1 == gold.x1); +                    NPNR_ASSERT(incr.y0 == gold.y0); +                    NPNR_ASSERT(incr.y1 == gold.y1); +                    NPNR_ASSERT(incr.nx0 == gold.nx0); +                    NPNR_ASSERT(incr.nx1 == gold.nx1); +                    NPNR_ASSERT(incr.ny0 == gold.ny0); +                    NPNR_ASSERT(incr.ny1 == gold.ny1); +                }              } -            if (curr_metric < min_metric) { -                min_metric = curr_metric; +            if (curr_wirelen_cost < min_wirelen) { +                min_wirelen = curr_wirelen_cost;                  improved = true;              } @@ -215,9 +320,10 @@ class SAPlacer              else                  n_no_progress++; -            if (temp <= 1e-3 && n_no_progress >= 5) { -                if (iter % 5 != 0) -                    log_info("  at iteration #%d: temp = %f, cost = %f\n", iter, temp, double(curr_metric)); +            if (temp <= 1e-7 && n_no_progress >= (refine ? 1 : 5)) { +                log_info("  at iteration #%d: temp = %f, timing cost = " +                         "%.0f, wirelen = %.0f \n", +                         iter, temp, double(curr_timing_cost), double(curr_wirelen_cost));                  break;              } @@ -225,64 +331,75 @@ class SAPlacer              int M = std::max(max_x, max_y) + 1; -            double upper = 0.6, lower = 0.4; +            if (ctx->verbose) +                log("iter #%d: temp = %f, timing cost = " +                    "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", +                    iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); -            if (curr_metric < 0.95 * avg_metric) { -                avg_metric = 0.8 * avg_metric + 0.2 * curr_metric; +            if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { +                avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost;              } else { -                if (Raccept >= 0.8) { -                    temp *= 0.7; -                } else if (Raccept > upper) { -                    if (diameter < M) -                        diameter++; -                    else -                        temp *= 0.9; -                } else if (Raccept > lower) { +                double diam_next = diameter * (1.0 - 0.44 + Raccept); +                diameter = std::max<int>(1, std::min<int>(M, int(diam_next + 0.5))); +                if (Raccept > 0.96) { +                    temp *= 0.5; +                } else if (Raccept > 0.8) { +                    temp *= 0.9; +                } else if (Raccept > 0.15 && diameter > 1) {                      temp *= 0.95;                  } else { -                    // Raccept < 0.3 -                    if (diameter > 1) -                        diameter--; -                    else -                        temp *= 0.8; +                    temp *= 0.8;                  }              }              // Once cooled below legalise threshold, run legalisation and start requiring              // legal moves only -            if (temp < legalise_temp && require_legal) { +            if (diameter < legalise_dia && require_legal) {                  if (legalise_relative_constraints(ctx)) {                      // Only increase temperature if something was moved                      autoplaced.clear(); +                    chain_basis.clear();                      for (auto cell : sorted(ctx->cells)) { -                        if (cell.second->belStrength < STRENGTH_STRONG) +                        if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->constr_parent == nullptr && +                            !cell.second->constr_children.empty()) +                            chain_basis.push_back(cell.second); +                        else if (cell.second->belStrength < STRENGTH_STRONG)                              autoplaced.push_back(cell.second);                      } -                    temp = post_legalise_temp; -                    diameter *= post_legalise_dia_scale; +                    // temp = post_legalise_temp; +                    // diameter = std::min<int>(M, diameter * post_legalise_dia_scale);                      ctx->shuffle(autoplaced);                      // Legalisation is a big change so force a slack redistribution here -                    if (ctx->slack_redist_iter > 0) +                    if (ctx->slack_redist_iter > 0 && cfg.budgetBased)                          assign_budget(ctx, true /* quiet */);                  }                  require_legal = false; -            } else if (ctx->slack_redist_iter > 0 && iter % ctx->slack_redist_iter == 0) { +            } else if (cfg.budgetBased && ctx->slack_redist_iter > 0 && iter % ctx->slack_redist_iter == 0) {                  assign_budget(ctx, true /* quiet */);              } +            // Invoke timing analysis to obtain criticalities +            if (!cfg.budgetBased && ctx->timing_driven) +                get_criticalities(ctx, &net_crit); +            // Need to rebuild costs after criticalities change +            setup_costs(); +            // Reset incremental bounds +            moveChange.reset(this); +            moveChange.new_net_bounds = net_bounds; +              // Recalculate total metric entirely to avoid rounding errors              // accumulating over time -            curr_metric = 0; -            curr_tns = 0; -            for (auto &net : ctx->nets) { -                wirelen_t wl = get_net_metric(ctx, net.second.get(), MetricType::COST, curr_tns); -                costs[net.second->udata] = CostChange{wl, -1}; -                curr_metric += wl; -            } - +            curr_wirelen_cost = total_wirelen_cost(); +            curr_timing_cost = total_timing_cost(); +            last_wirelen_cost = curr_wirelen_cost; +            last_timing_cost = curr_timing_cost;              // Let the UI show visualization updates.              ctx->yield();          } + +        auto saplace_end = std::chrono::high_resolution_clock::now(); +        log_info("SA placement time %.02fs\n", std::chrono::duration<float>(saplace_end - saplace_start).count()); +          // Final post-pacement validitiy check          ctx->yield();          for (auto bel : ctx->getBels()) { @@ -327,7 +444,8 @@ class SAPlacer                  ctx->unbindBel(cell->bel);              }              IdString targetType = cell->type; -            for (auto bel : ctx->getBels()) { + +            auto proc_bel = [&](BelId bel) {                  if (ctx->getBelType(bel) == targetType && ctx->isValidBelForCell(cell, bel)) {                      if (ctx->checkBelAvail(bel)) {                          uint64_t score = ctx->rng64(); @@ -345,7 +463,18 @@ class SAPlacer                          }                      }                  } +            }; + +            if (cell->region != nullptr && cell->region->constr_bels) { +                for (auto bel : cell->region->bels) { +                    proc_bel(bel); +                } +            } else { +                for (auto bel : ctx->getBels()) { +                    proc_bel(bel); +                }              } +              if (best_bel == BelId()) {                  if (iters == 0 || ripup_bel == BelId())                      log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx)); @@ -366,49 +495,38 @@ class SAPlacer      // Attempt a SA position swap, return true on success or false on failure      bool try_swap_position(CellInfo *cell, BelId newBel)      { -        static std::vector<NetInfo *> updates; -        updates.clear(); +        static const double epsilon = 1e-20; +        moveChange.reset(this); +        if (!require_legal && is_constrained(cell)) +            return false;          BelId oldBel = cell->bel;          CellInfo *other_cell = ctx->getBoundBelCell(newBel); -        if (other_cell != nullptr && other_cell->belStrength > STRENGTH_WEAK) { +        if (!require_legal && other_cell != nullptr && +            (is_constrained(other_cell) || other_cell->belStrength > STRENGTH_WEAK)) {              return false;          }          int old_dist = get_constraints_distance(ctx, cell);          int new_dist;          if (other_cell != nullptr)              old_dist += get_constraints_distance(ctx, other_cell); -        wirelen_t new_metric = 0, delta; +        double delta = 0;          ctx->unbindBel(oldBel);          if (other_cell != nullptr) {              ctx->unbindBel(newBel);          } -        for (const auto &port : cell->ports) { -            if (port.second.net != nullptr) { -                auto &cost = costs[port.second.net->udata]; -                if (cost.new_cost == 0) -                    continue; -                cost.new_cost = 0; -                updates.emplace_back(port.second.net); -            } -        } +        ctx->bindBel(newBel, cell, STRENGTH_WEAK);          if (other_cell != nullptr) { -            for (const auto &port : other_cell->ports) -                if (port.second.net != nullptr) { -                    auto &cost = costs[port.second.net->udata]; -                    if (cost.new_cost == 0) -                        continue; -                    cost.new_cost = 0; -                    updates.emplace_back(port.second.net); -                } +            ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK);          } -        ctx->bindBel(newBel, cell, STRENGTH_WEAK); +        add_move_cell(moveChange, cell, oldBel);          if (other_cell != nullptr) { -            ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); +            add_move_cell(moveChange, other_cell, newBel);          } +          if (!ctx->isBelLocationValid(newBel) || ((other_cell != nullptr && !ctx->isBelLocationValid(oldBel)))) {              ctx->unbindBel(newBel);              if (other_cell != nullptr) @@ -416,26 +534,18 @@ class SAPlacer              goto swap_fail;          } -        new_metric = curr_metric; -          // Recalculate metrics for all nets touched by the peturbation -        for (const auto &net : updates) { -            auto &c = costs[net->udata]; -            new_metric -= c.curr_cost; -            float temp_tns = 0; -            wirelen_t net_new_wl = get_net_metric(ctx, net, MetricType::COST, temp_tns); -            new_metric += net_new_wl; -            c.new_cost = net_new_wl; -        } +        compute_cost_changes(moveChange);          new_dist = get_constraints_distance(ctx, cell);          if (other_cell != nullptr)              new_dist += get_constraints_distance(ctx, other_cell); -        delta = new_metric - curr_metric; -        delta += (cfg.constraintWeight / temp) * (new_dist - old_dist); +        delta = lambda * (moveChange.timing_delta / std::max<double>(last_timing_cost, epsilon)) + +                (1 - lambda) * (double(moveChange.wirelen_delta) / std::max<double>(last_wirelen_cost, epsilon)); +        delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost;          n_move++;          // SA acceptance criterea -        if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { +        if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) {              n_accept++;          } else {              if (other_cell != nullptr) @@ -443,32 +553,149 @@ class SAPlacer              ctx->unbindBel(newBel);              goto swap_fail;          } -        curr_metric = new_metric; -        for (const auto &net : updates) { -            auto &c = costs[net->udata]; -            c = CostChange{c.new_cost, -1}; -        } - +        commit_cost_changes(moveChange); +#if 0 +        log_info("swap %s -> %s\n", cell->name.c_str(ctx), ctx->getBelName(newBel).c_str(ctx)); +        if (other_cell != nullptr) +            log_info("swap %s -> %s\n", other_cell->name.c_str(ctx), ctx->getBelName(oldBel).c_str(ctx)); +#endif          return true;      swap_fail:          ctx->bindBel(oldBel, cell, STRENGTH_WEAK);          if (other_cell != nullptr) {              ctx->bindBel(newBel, other_cell, STRENGTH_WEAK);          } -        for (const auto &net : updates) -            costs[net->udata].new_cost = -1; +        return false; +    } + +    inline bool is_constrained(CellInfo *cell) +    { +        return cell->constr_parent != nullptr || !cell->constr_children.empty(); +    } + +    // Swap the Bel of a cell with another, return the original location +    BelId swap_cell_bels(CellInfo *cell, BelId newBel) +    { +        BelId oldBel = cell->bel; +#if 0 +        log_info("%s old: %s new: %s\n", cell->name.c_str(ctx), ctx->getBelName(cell->bel).c_str(ctx), ctx->getBelName(newBel).c_str(ctx)); +#endif +        CellInfo *bound = ctx->getBoundBelCell(newBel); +        if (bound != nullptr) +            ctx->unbindBel(newBel); +        ctx->unbindBel(oldBel); +        ctx->bindBel(newBel, cell, is_constrained(cell) ? STRENGTH_STRONG : STRENGTH_WEAK); +        if (bound != nullptr) +            ctx->bindBel(oldBel, bound, is_constrained(bound) ? STRENGTH_STRONG : STRENGTH_WEAK); +        return oldBel; +    } + +    // Discover the relative positions of all cells in a chain +    void discover_chain(Loc baseLoc, CellInfo *cell, std::vector<std::pair<CellInfo *, Loc>> &cell_rel) +    { +        Loc cellLoc = ctx->getBelLocation(cell->bel); +        Loc rel{cellLoc.x - baseLoc.x, cellLoc.y - baseLoc.y, cellLoc.z}; +        cell_rel.emplace_back(std::make_pair(cell, rel)); +        for (auto child : cell->constr_children) +            discover_chain(baseLoc, child, cell_rel); +    } + +    // Attempt to swap a chain with a non-chain +    bool try_swap_chain(CellInfo *cell, BelId newBase) +    { +        std::vector<std::pair<CellInfo *, Loc>> cell_rel; +        std::unordered_set<IdString> cells; +        std::vector<std::pair<CellInfo *, BelId>> moves_made; +        std::vector<std::pair<CellInfo *, BelId>> dest_bels; +        double delta = 0; +        moveChange.reset(this); +        if (ctx->debug) +            log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx)); + +        Loc baseLoc = ctx->getBelLocation(cell->bel); +        discover_chain(baseLoc, cell, cell_rel); +        Loc newBaseLoc = ctx->getBelLocation(newBase); +        NPNR_ASSERT(newBaseLoc.z == baseLoc.z); +        for (const auto &cr : cell_rel) +            cells.insert(cr.first->name); + +        for (const auto &cr : cell_rel) { +            Loc targetLoc = {newBaseLoc.x + cr.second.x, newBaseLoc.y + cr.second.y, cr.second.z}; +            BelId targetBel = ctx->getBelByLocation(targetLoc); +            if (targetBel == BelId()) +                return false; +            if (ctx->getBelType(targetBel) != cell->type) +                return false; +            CellInfo *bound = ctx->getBoundBelCell(targetBel); +            // We don't consider swapping chains with other chains, at least for the time being - unless it is +            // part of this chain +            if (bound != nullptr && !cells.count(bound->name) && +                (bound->belStrength >= STRENGTH_STRONG || is_constrained(bound))) +                return false; +            dest_bels.emplace_back(std::make_pair(cr.first, targetBel)); +        } +        if (ctx->debug) +            log_info("trying chain swap %s\n", cell->name.c_str(ctx)); +        // <cell, oldBel> +        for (const auto &db : dest_bels) { +            BelId oldBel = swap_cell_bels(db.first, db.second); +            moves_made.emplace_back(std::make_pair(db.first, oldBel)); +            CellInfo *bound = ctx->getBoundBelCell(oldBel); +            add_move_cell(moveChange, db.first, oldBel); +            if (bound != nullptr) +                add_move_cell(moveChange, bound, db.second); +        } +        for (const auto &mm : moves_made) { +            if (!ctx->isBelLocationValid(mm.first->bel) || !check_cell_bel_region(mm.first, mm.first->bel)) +                goto swap_fail; +            if (!ctx->isBelLocationValid(mm.second)) +                goto swap_fail; +            CellInfo *bound = ctx->getBoundBelCell(mm.second); +            if (bound && !check_cell_bel_region(bound, bound->bel)) +                goto swap_fail; +        } +        compute_cost_changes(moveChange); +        delta = lambda * (moveChange.timing_delta / last_timing_cost) + +                (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); +        n_move++; +        // SA acceptance criterea +        if (delta < 0 || (temp > 1e-9 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { +            n_accept++; +            if (ctx->debug) +                log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); +        } else { +            goto swap_fail; +        } +        commit_cost_changes(moveChange); +        return true; +    swap_fail: +        for (const auto &entry : boost::adaptors::reverse(moves_made)) +            swap_cell_bels(entry.first, entry.second);          return false;      }      // Find a random Bel of the correct type for a cell, within the specified      // diameter -    BelId random_bel_for_cell(CellInfo *cell) +    BelId random_bel_for_cell(CellInfo *cell, int force_z = -1)      {          IdString targetType = cell->type;          Loc curr_loc = ctx->getBelLocation(cell->bel); +        int count = 0; + +        int dx = diameter, dy = diameter; +        if (cell->region != nullptr && cell->region->constr_bels) { +            dx = std::min(diameter, (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1); +            dy = std::min(diameter, (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1); +            // Clamp location to within bounds +            curr_loc.x = std::max(region_bounds[cell->region->name].x0, curr_loc.x); +            curr_loc.x = std::min(region_bounds[cell->region->name].x1, curr_loc.x); +            curr_loc.y = std::max(region_bounds[cell->region->name].y0, curr_loc.y); +            curr_loc.y = std::min(region_bounds[cell->region->name].y1, curr_loc.y); +        } +          while (true) { -            int nx = ctx->rng(2 * diameter + 1) + std::max(curr_loc.x - diameter, 0); -            int ny = ctx->rng(2 * diameter + 1) + std::max(curr_loc.y - diameter, 0); +            int nx = ctx->rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0); +            int ny = ctx->rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0);              int beltype_idx, beltype_cnt;              std::tie(beltype_idx, beltype_cnt) = bel_types.at(targetType);              if (beltype_cnt < cfg.minBelsForGridPick) @@ -481,41 +708,436 @@ class SAPlacer              if (fb.size() == 0)                  continue;              BelId bel = fb.at(ctx->rng(int(fb.size()))); +            if (force_z != -1) { +                Loc loc = ctx->getBelLocation(bel); +                if (loc.z != force_z) +                    continue; +            } +            if (!check_cell_bel_region(cell, bel)) +                continue;              if (locked_bels.find(bel) != locked_bels.end())                  continue; +            count++;              return bel;          }      } +    // Return true if a net is to be entirely ignored +    inline bool ignore_net(NetInfo *net) +    { +        return net->driver.cell == nullptr || net->driver.cell->bel == BelId() || +               ctx->getBelGlobalBuf(net->driver.cell->bel); +    } + +    // Get the bounding box for a net +    inline BoundingBox get_net_bounds(NetInfo *net) +    { +        BoundingBox bb; +        NPNR_ASSERT(net->driver.cell != nullptr); +        Loc dloc = ctx->getBelLocation(net->driver.cell->bel); +        bb.x0 = dloc.x; +        bb.x1 = dloc.x; +        bb.y0 = dloc.y; +        bb.y1 = dloc.y; +        bb.nx0 = 1; +        bb.nx1 = 1; +        bb.ny0 = 1; +        bb.ny1 = 1; +        for (auto user : net->users) { +            if (user.cell->bel == BelId()) +                continue; +            Loc uloc = ctx->getBelLocation(user.cell->bel); +            if (bb.x0 == uloc.x) +                ++bb.nx0; +            else if (uloc.x < bb.x0) { +                bb.x0 = uloc.x; +                bb.nx0 = 1; +            } +            if (bb.x1 == uloc.x) +                ++bb.nx1; +            else if (uloc.x > bb.x1) { +                bb.x1 = uloc.x; +                bb.nx1 = 1; +            } +            if (bb.y0 == uloc.y) +                ++bb.ny0; +            else if (uloc.y < bb.y0) { +                bb.y0 = uloc.y; +                bb.ny0 = 1; +            } +            if (bb.y1 == uloc.y) +                ++bb.ny1; +            else if (uloc.y > bb.y1) { +                bb.y1 = uloc.y; +                bb.ny1 = 1; +            } +        } + +        return bb; +    } + +    // Get the timing cost for an arc of a net +    inline double get_timing_cost(NetInfo *net, size_t user) +    { +        int cc; +        if (net->driver.cell == nullptr) +            return 0; +        if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE) +            return 0; +        if (cfg.budgetBased) { +            double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); +            return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10)); +        } else { +            auto crit = net_crit.find(net->name); +            if (crit == net_crit.end() || crit->second.criticality.empty()) +                return 0; +            double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); +            return delay * std::pow(crit->second.criticality.at(user), crit_exp); +        } +    } + +    // Set up the cost maps +    void setup_costs() +    { +        for (auto net : sorted(ctx->nets)) { +            NetInfo *ni = net.second; +            if (ignore_net(ni)) +                continue; +            net_bounds[ni->udata] = get_net_bounds(ni); +            if (ctx->timing_driven && int(ni->users.size()) < cfg.timingFanoutThresh) +                for (size_t i = 0; i < ni->users.size(); i++) +                    net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); +        } +    } + +    // Get the total wiring cost for the design +    wirelen_t total_wirelen_cost() +    { +        wirelen_t cost = 0; +        for (const auto &net : net_bounds) +            cost += net.hpwl(); +        return cost; +    } + +    // Get the total timing cost for the design +    double total_timing_cost() +    { +        double cost = 0; +        for (const auto &net : net_arc_tcost) { +            for (auto arc_cost : net) { +                cost += arc_cost; +            } +        } +        return cost; +    } + +    // Cost-change-related data for a move +    struct MoveChangeData +    { + +        enum BoundChangeType +        { +            NO_CHANGE, +            CELL_MOVED_INWARDS, +            CELL_MOVED_OUTWARDS, +            FULL_RECOMPUTE +        }; + +        std::vector<decltype(NetInfo::udata)> bounds_changed_nets_x, bounds_changed_nets_y; +        std::vector<std::pair<decltype(NetInfo::udata), size_t>> changed_arcs; + +        std::vector<BoundChangeType> already_bounds_changed_x, already_bounds_changed_y; +        std::vector<std::vector<bool>> already_changed_arcs; + +        std::vector<BoundingBox> new_net_bounds; +        std::vector<std::pair<std::pair<decltype(NetInfo::udata), size_t>, double>> new_arc_costs; + +        wirelen_t wirelen_delta = 0; +        double timing_delta = 0; + +        void init(SAPlacer *p) +        { +            already_bounds_changed_x.resize(p->ctx->nets.size()); +            already_bounds_changed_y.resize(p->ctx->nets.size()); +            already_changed_arcs.resize(p->ctx->nets.size()); +            for (auto &net : p->ctx->nets) { +                already_changed_arcs.at(net.second->udata).resize(net.second->users.size()); +            } +            new_net_bounds = p->net_bounds; +        } + +        void reset(SAPlacer *p) +        { +            for (auto bc : bounds_changed_nets_x) { +                new_net_bounds[bc] = p->net_bounds[bc]; +                already_bounds_changed_x[bc] = NO_CHANGE; +            } +            for (auto bc : bounds_changed_nets_y) { +                new_net_bounds[bc] = p->net_bounds[bc]; +                already_bounds_changed_y[bc] = NO_CHANGE; +            } +            for (const auto &tc : changed_arcs) +                already_changed_arcs[tc.first][tc.second] = false; +            bounds_changed_nets_x.clear(); +            bounds_changed_nets_y.clear(); +            changed_arcs.clear(); +            new_arc_costs.clear(); +            wirelen_delta = 0; +            timing_delta = 0; +        } + +    } moveChange; + +    void add_move_cell(MoveChangeData &mc, CellInfo *cell, BelId old_bel) +    { +        Loc curr_loc = ctx->getBelLocation(cell->bel); +        Loc old_loc = ctx->getBelLocation(old_bel); +        // Check net bounds +        for (const auto &port : cell->ports) { +            NetInfo *pn = port.second.net; +            if (pn == nullptr) +                continue; +            if (ignore_net(pn)) +                continue; +            BoundingBox &curr_bounds = mc.new_net_bounds[pn->udata]; +            // Incremental bounding box updates +            // Note that everything other than full updates are applied immediately rather than being queued, +            // so further updates to the same net in the same move are dealt with correctly. +            // If a full update is already queued, this can be considered a no-op +            if (mc.already_bounds_changed_x[pn->udata] != MoveChangeData::FULL_RECOMPUTE) { +                // Bounds x0 +                if (curr_loc.x < curr_bounds.x0) { +                    // Further out than current bounds x0 +                    curr_bounds.x0 = curr_loc.x; +                    curr_bounds.nx0 = 1; +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { +                        // Checking already_bounds_changed_x ensures that each net is only added once +                        // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the +                        // overall cost change +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    } +                } else if (curr_loc.x == curr_bounds.x0 && old_loc.x > curr_bounds.x0) { +                    curr_bounds.nx0++; +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    } +                } else if (old_loc.x == curr_bounds.x0 && curr_loc.x > curr_bounds.x0) { +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    if (curr_bounds.nx0 == 1) { +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE; +                    } else { +                        curr_bounds.nx0--; +                        if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) +                            mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; +                    } +                } + +                // Bounds x1 +                if (curr_loc.x > curr_bounds.x1) { +                    // Further out than current bounds x1 +                    curr_bounds.x1 = curr_loc.x; +                    curr_bounds.nx1 = 1; +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { +                        // Checking already_bounds_changed_x ensures that each net is only added once +                        // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the +                        // overall cost change +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    } +                } else if (curr_loc.x == curr_bounds.x1 && old_loc.x < curr_bounds.x1) { +                    curr_bounds.nx1++; +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    } +                } else if (old_loc.x == curr_bounds.x1 && curr_loc.x < curr_bounds.x1) { +                    if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) +                        mc.bounds_changed_nets_x.push_back(pn->udata); +                    if (curr_bounds.nx1 == 1) { +                        mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE; +                    } else { +                        curr_bounds.nx1--; +                        if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) +                            mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; +                    } +                } +            } +            if (mc.already_bounds_changed_y[pn->udata] != MoveChangeData::FULL_RECOMPUTE) { +                // Bounds y0 +                if (curr_loc.y < curr_bounds.y0) { +                    // Further out than current bounds y0 +                    curr_bounds.y0 = curr_loc.y; +                    curr_bounds.ny0 = 1; +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    } +                } else if (curr_loc.y == curr_bounds.y0 && old_loc.y > curr_bounds.y0) { +                    curr_bounds.ny0++; +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    } +                } else if (old_loc.y == curr_bounds.y0 && curr_loc.y > curr_bounds.y0) { +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    if (curr_bounds.ny0 == 1) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE; +                    } else { +                        curr_bounds.ny0--; +                        if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) +                            mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; +                    } +                } + +                // Bounds y1 +                if (curr_loc.y > curr_bounds.y1) { +                    // Further out than current bounds y1 +                    curr_bounds.y1 = curr_loc.y; +                    curr_bounds.ny1 = 1; +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    } +                } else if (curr_loc.y == curr_bounds.y1 && old_loc.y < curr_bounds.y1) { +                    curr_bounds.ny1++; +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    } +                } else if (old_loc.y == curr_bounds.y1 && curr_loc.y < curr_bounds.y1) { +                    if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) +                        mc.bounds_changed_nets_y.push_back(pn->udata); +                    if (curr_bounds.ny1 == 1) { +                        mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE; +                    } else { +                        curr_bounds.ny1--; +                        if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) +                            mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; +                    } +                } +            } + +            if (ctx->timing_driven && int(pn->users.size()) < cfg.timingFanoutThresh) { +                // Output ports - all arcs change timing +                if (port.second.type == PORT_OUT) { +                    int cc; +                    TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); +                    if (cls != TMG_IGNORE) +                        for (size_t i = 0; i < pn->users.size(); i++) +                            if (!mc.already_changed_arcs[pn->udata][i]) { +                                mc.changed_arcs.emplace_back(std::make_pair(pn->udata, i)); +                                mc.already_changed_arcs[pn->udata][i] = true; +                            } +                } else if (port.second.type == PORT_IN) { +                    auto usr = fast_port_to_user.at(&port.second); +                    if (!mc.already_changed_arcs[pn->udata][usr]) { +                        mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr)); +                        mc.already_changed_arcs[pn->udata][usr] = true; +                    } +                } +            } +        } +    } + +    void compute_cost_changes(MoveChangeData &md) +    { +        for (const auto &bc : md.bounds_changed_nets_x) { +            if (md.already_bounds_changed_x[bc] == MoveChangeData::FULL_RECOMPUTE) +                md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]); +        } +        for (const auto &bc : md.bounds_changed_nets_y) { +            if (md.already_bounds_changed_x[bc] != MoveChangeData::FULL_RECOMPUTE && +                md.already_bounds_changed_y[bc] == MoveChangeData::FULL_RECOMPUTE) +                md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]); +        } + +        for (const auto &bc : md.bounds_changed_nets_x) +            md.wirelen_delta += md.new_net_bounds[bc].hpwl() - net_bounds[bc].hpwl(); +        for (const auto &bc : md.bounds_changed_nets_y) +            if (md.already_bounds_changed_x[bc] == MoveChangeData::NO_CHANGE) +                md.wirelen_delta += md.new_net_bounds[bc].hpwl() - net_bounds[bc].hpwl(); + +        if (ctx->timing_driven) { +            for (const auto &tc : md.changed_arcs) { +                double old_cost = net_arc_tcost.at(tc.first).at(tc.second); +                double new_cost = get_timing_cost(net_by_udata.at(tc.first), tc.second); +                md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); +                md.timing_delta += (new_cost - old_cost); +                md.already_changed_arcs[tc.first][tc.second] = false; +            } +        } +    } + +    void commit_cost_changes(MoveChangeData &md) +    { +        for (const auto &bc : md.bounds_changed_nets_x) +            net_bounds[bc] = md.new_net_bounds[bc]; +        for (const auto &bc : md.bounds_changed_nets_y) +            net_bounds[bc] = md.new_net_bounds[bc]; +        for (const auto &tc : md.new_arc_costs) +            net_arc_tcost[tc.first.first].at(tc.first.second) = tc.second; +        curr_wirelen_cost += md.wirelen_delta; +        curr_timing_cost += md.timing_delta; +    } +    // Build the cell port -> user index +    void build_port_index() +    { +        for (auto net : sorted(ctx->nets)) { +            NetInfo *ni = net.second; +            for (size_t i = 0; i < ni->users.size(); i++) { +                auto &usr = ni->users.at(i); +                fast_port_to_user[&(usr.cell->ports.at(usr.port))] = i; +            } +        } +    } + +    // Get the combined wirelen/timing metric +    inline double curr_metric() { return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost; } + +    // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds +    std::vector<BoundingBox> net_bounds; +    // Map net arcs to their timing cost (criticality * delay ns) +    std::vector<std::vector<double>> net_arc_tcost; + +    // Fast lookup for cell port to net user index +    std::unordered_map<const PortInfo *, size_t> fast_port_to_user; + +    // Wirelength and timing cost at last and current iteration +    wirelen_t last_wirelen_cost, curr_wirelen_cost; +    double last_timing_cost, curr_timing_cost; + +    // Criticality data from timing analysis +    NetCriticalityMap net_crit; +      Context *ctx; -    wirelen_t curr_metric = std::numeric_limits<wirelen_t>::max(); -    float curr_tns = 0; -    float temp = 1000; +    float temp = 10; +    float crit_exp = 8; +    float lambda = 0.5;      bool improved = false;      int n_move, n_accept;      int diameter = 35, max_x = 1, max_y = 1;      std::unordered_map<IdString, std::tuple<int, int>> bel_types; +    std::unordered_map<IdString, BoundingBox> region_bounds;      std::vector<std::vector<std::vector<std::vector<BelId>>>> fast_bels;      std::unordered_set<BelId> locked_bels; +    std::vector<NetInfo *> net_by_udata; +    std::vector<decltype(NetInfo::udata)> old_udata;      bool require_legal = true; -    const float legalise_temp = 1; -    const float post_legalise_temp = 10; -    const float post_legalise_dia_scale = 1.5; +    const int legalise_dia = 4;      Placer1Cfg cfg; - -    struct CostChange -    { -        wirelen_t curr_cost; -        wirelen_t new_cost; -    }; -    std::vector<CostChange> costs; -    std::vector<decltype(NetInfo::udata)> old_udata;  };  Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx)  {      constraintWeight = get<float>("placer1/constraintWeight", 10);      minBelsForGridPick = get<int>("placer1/minBelsForGridPick", 64); +    budgetBased = get<bool>("placer1/budgetBased", false); +    startTemp = get<float>("placer1/startTemp", 1); +    timingFanoutThresh = std::numeric_limits<int>::max();  }  bool placer1(Context *ctx, Placer1Cfg cfg) @@ -538,4 +1160,24 @@ bool placer1(Context *ctx, Placer1Cfg cfg)      }  } +bool placer1_refine(Context *ctx, Placer1Cfg cfg) +{ +    try { +        SAPlacer placer(ctx, cfg); +        placer.place(true); +        log_info("Checksum: 0x%08x\n", ctx->checksum()); +#ifndef NDEBUG +        ctx->lock(); +        ctx->check(); +        ctx->unlock(); +#endif +        return true; +    } catch (log_execution_error_exception) { +#ifndef NDEBUG +        ctx->check(); +#endif +        return false; +    } +} +  NEXTPNR_NAMESPACE_END diff --git a/common/placer1.h b/common/placer1.h index 7305f4b1..4c7c7339 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -29,9 +29,13 @@ struct Placer1Cfg : public Settings      Placer1Cfg(Context *ctx);      float constraintWeight;      int minBelsForGridPick; +    bool budgetBased; +    float startTemp; +    int timingFanoutThresh;  };  extern bool placer1(Context *ctx, Placer1Cfg cfg); +extern bool placer1_refine(Context *ctx, Placer1Cfg cfg);  NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.cc b/common/placer_heap.cc new file mode 100644 index 00000000..f9b639f8 --- /dev/null +++ b/common/placer_heap.cc @@ -0,0 +1,1545 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2019  David Shah <david@symbioticeda.com> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + *  [[cite]] HeAP + *  Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + *  https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + *  [[cite]] SimPL + *  SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + *  http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + * + *  Notable changes from the original algorithm + *   - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is + *     added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation) + *     as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by + *     spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices. + *   - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) + */ + +#ifdef WITH_HEAP + +#include "placer_heap.h" +#include <Eigen/Core> +#include <Eigen/IterativeLinearSolvers> +#include <boost/optional.hpp> +#include <chrono> +#include <deque> +#include <fstream> +#include <numeric> +#include <queue> +#include <thread> +#include <tuple> +#include <unordered_map> +#include "log.h" +#include "nextpnr.h" +#include "place_common.h" +#include "placer1.h" +#include "timing.h" +#include "util.h" +NEXTPNR_NAMESPACE_BEGIN + +namespace { +// A simple internal representation for a sparse system of equations Ax = rhs +// This is designed to decouple the functions that build the matrix to the engine that +// solves it, and the representation that requires +template <typename T> struct EquationSystem +{ + +    EquationSystem(size_t rows, size_t cols) +    { +        A.resize(cols); +        rhs.resize(rows); +    } + +    // Simple sparse format, easy to convert to CCS for solver +    std::vector<std::vector<std::pair<int, T>>> A; // col -> (row, x[row, col]) sorted by row +    std::vector<T> rhs;                            // RHS vector +    void reset() +    { +        for (auto &col : A) +            col.clear(); +        std::fill(rhs.begin(), rhs.end(), T()); +    } + +    void add_coeff(int row, int col, T val) +    { +        auto &Ac = A.at(col); +        // Binary search +        int b = 0, e = int(Ac.size()) - 1; +        while (b <= e) { +            int i = (b + e) / 2; +            if (Ac.at(i).first == row) { +                Ac.at(i).second += val; +                return; +            } +            if (Ac.at(i).first > row) +                e = i - 1; +            else +                b = i + 1; +        } +        Ac.insert(Ac.begin() + b, std::make_pair(row, val)); +    } + +    void add_rhs(int row, T val) { rhs[row] += val; } + +    void solve(std::vector<T> &x) +    { +        using namespace Eigen; +        if (x.empty()) +            return; +        NPNR_ASSERT(x.size() == A.size()); + +        VectorXd vx(x.size()), vb(rhs.size()); +        SparseMatrix<T> mat(A.size(), A.size()); + +        std::vector<int> colnnz; +        for (auto &Ac : A) +            colnnz.push_back(int(Ac.size())); +        mat.reserve(colnnz); +        for (int col = 0; col < int(A.size()); col++) { +            auto &Ac = A.at(col); +            for (auto &el : Ac) +                mat.insert(el.first, col) = el.second; +        } + +        for (int i = 0; i < int(x.size()); i++) +            vx[i] = x.at(i); +        for (int i = 0; i < int(rhs.size()); i++) +            vb[i] = rhs.at(i); + +        ConjugateGradient<SparseMatrix<T>, Lower | Upper> solver; +        solver.setTolerance(1e-5); +        VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx); +        for (int i = 0; i < int(x.size()); i++) +            x.at(i) = xr[i]; +        // for (int i = 0; i < int(x.size()); i++) +        //    log_info("x[%d] = %f\n", i, x.at(i)); +    } +}; + +} // namespace + +class HeAPPlacer +{ +  public: +    HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg) { Eigen::initParallel(); } + +    bool place() +    { +        auto startt = std::chrono::high_resolution_clock::now(); + +        ctx->lock(); +        place_constraints(); +        build_fast_bels(); +        seed_placement(); +        update_all_chains(); +        wirelen_t hpwl = total_hpwl(); +        log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n", +                 int(place_cells.size()), int(hpwl)); +        for (int i = 0; i < 4; i++) { +            setup_solve_cells(); +            auto solve_startt = std::chrono::high_resolution_clock::now(); +            std::thread xaxis([&]() { build_solve_direction(false, -1); }); +            build_solve_direction(true, -1); +            xaxis.join(); +            auto solve_endt = std::chrono::high_resolution_clock::now(); +            solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count(); + +            update_all_chains(); + +            hpwl = total_hpwl(); +            log_info("    at initial placer iter %d, wirelen = %d\n", i, int(hpwl)); +        } + +        wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits<wirelen_t>::max(); +        int iter = 0, stalled = 0; + +        std::vector<std::tuple<CellInfo *, BelId, PlaceStrength>> solution; + +        std::vector<std::unordered_set<IdString>> heap_runs; +        std::unordered_set<IdString> all_celltypes; +        std::unordered_map<IdString, int> ct_count; + +        for (auto cell : place_cells) { +            if (!all_celltypes.count(cell->type)) { +                heap_runs.push_back(std::unordered_set<IdString>{cell->type}); +                all_celltypes.insert(cell->type); +            } +            ct_count[cell->type]++; +        } +        // If more than 98% of cells are one cell type, always solve all at once +        // Otherwise, follow full HeAP strategy of rotate&all +        for (auto &c : ct_count) +            if (c.second >= 0.98 * int(place_cells.size())) { +                heap_runs.clear(); +                break; +            } + +        heap_runs.push_back(all_celltypes); +        // The main HeAP placer loop +        log_info("Running main analytical placer.\n"); +        while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) { +            // Alternate between particular Bel types and all bels +            for (auto &run : heap_runs) { +                auto run_startt = std::chrono::high_resolution_clock::now(); + +                setup_solve_cells(&run); +                if (solve_cells.empty()) +                    continue; +                // Heuristic: don't bother with threading below a certain size +                auto solve_startt = std::chrono::high_resolution_clock::now(); + +                if (solve_cells.size() < 500) { +                    build_solve_direction(false, (iter == 0) ? -1 : iter); +                    build_solve_direction(true, (iter == 0) ? -1 : iter); +                } else { +                    std::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); }); +                    build_solve_direction(true, (iter == 0) ? -1 : iter); +                    xaxis.join(); +                } +                auto solve_endt = std::chrono::high_resolution_clock::now(); +                solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count(); +                update_all_chains(); +                solved_hpwl = total_hpwl(); + +                update_all_chains(); +                for (auto type : sorted(run)) +                    CutSpreader(this, type).run(); + +                update_all_chains(); +                spread_hpwl = total_hpwl(); +                legalise_placement_strict(true); +                update_all_chains(); + +                legal_hpwl = total_hpwl(); +                auto run_stopt = std::chrono::high_resolution_clock::now(); +                log_info("    at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n", +                         iter + 1, (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), int(solved_hpwl), +                         int(spread_hpwl), int(legal_hpwl), +                         std::chrono::duration<double>(run_stopt - run_startt).count()); +            } + +            if (ctx->timing_driven) +                get_criticalities(ctx, &net_crit); + +            if (legal_hpwl < best_hpwl) { +                best_hpwl = legal_hpwl; +                stalled = 0; +                // Save solution +                solution.clear(); +                for (auto cell : sorted(ctx->cells)) { +                    solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength); +                } +            } else { +                ++stalled; +            } +            for (auto &cl : cell_locs) { +                cl.second.legal_x = cl.second.x; +                cl.second.legal_y = cl.second.y; +            } +            ctx->yield(); +            ++iter; +        } + +        // Apply saved solution +        for (auto &sc : solution) { +            CellInfo *cell = std::get<0>(sc); +            if (cell->bel != BelId()) +                ctx->unbindBel(cell->bel); +        } +        for (auto &sc : solution) { +            CellInfo *cell; +            BelId bel; +            PlaceStrength strength; +            std::tie(cell, bel, strength) = sc; +            ctx->bindBel(bel, cell, strength); +        } + +        for (auto cell : sorted(ctx->cells)) { +            if (cell.second->bel == BelId()) +                log_error("Found unbound cell %s\n", cell.first.c_str(ctx)); +            if (ctx->getBoundBelCell(cell.second->bel) != cell.second) +                log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx)); +            if (ctx->debug) +                log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->getBelName(cell.second->bel).c_str(ctx)); +        } + +        ctx->unlock(); +        auto endtt = std::chrono::high_resolution_clock::now(); +        log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration<double>(endtt - startt).count()); +        log_info("  of which solving equations: %.02fs\n", solve_time); +        log_info("  of which spreading cells: %.02fs\n", cl_time); +        log_info("  of which strict legalisation: %.02fs\n", sl_time); + +        ctx->check(); + +        placer1_refine(ctx, Placer1Cfg(ctx)); + +        return true; +    } + +  private: +    Context *ctx; +    PlacerHeapCfg cfg; + +    int max_x = 0, max_y = 0; +    std::vector<std::vector<std::vector<std::vector<BelId>>>> fast_bels; +    std::unordered_map<IdString, std::tuple<int, int>> bel_types; + +    // For fast handling of heterogeneosity during initial placement without full legalisation, +    // for each Bel type this goes from x or y to the nearest x or y where a Bel of a given type exists +    // This is particularly important for the iCE40 architecture, where multipliers and BRAM only exist at the +    // edges and corners respectively +    std::vector<std::vector<int>> nearest_row_with_bel; +    std::vector<std::vector<int>> nearest_col_with_bel; + +    // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom +    // structure instead +    struct CellLocation +    { +        int x, y; +        int legal_x, legal_y; +        double rawx, rawy; +        bool locked, global; +    }; +    std::unordered_map<IdString, CellLocation> cell_locs; +    // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains +    // (only the root of each macro is placed.) +    std::vector<CellInfo *> place_cells; + +    // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place +    // cells of a certain type) +    std::vector<CellInfo *> solve_cells; + +    // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent +    // where chains are within chains +    std::unordered_map<IdString, CellInfo *> chain_root; +    std::unordered_map<IdString, int> chain_size; + +    // The offset from chain_root to a cell in the chain +    std::unordered_map<IdString, std::pair<int, int>> cell_offsets; + +    // Performance counting +    double solve_time = 0, cl_time = 0, sl_time = 0; + +    NetCriticalityMap net_crit; + +    // Place cells with the BEL attribute set to constrain them +    void place_constraints() +    { +        size_t placed_cells = 0; +        // Initial constraints placer +        for (auto &cell_entry : ctx->cells) { +            CellInfo *cell = cell_entry.second.get(); +            auto loc = cell->attrs.find(ctx->id("BEL")); +            if (loc != cell->attrs.end()) { +                std::string loc_name = loc->second; +                BelId bel = ctx->getBelByName(ctx->id(loc_name)); +                if (bel == BelId()) { +                    log_error("No Bel named \'%s\' located for " +                              "this chip (processing BEL attribute on \'%s\')\n", +                              loc_name.c_str(), cell->name.c_str(ctx)); +                } + +                IdString bel_type = ctx->getBelType(bel); +                if (bel_type != cell->type) { +                    log_error("Bel \'%s\' of type \'%s\' does not match cell " +                              "\'%s\' of type \'%s\'\n", +                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); +                } +                if (!ctx->isValidBelForCell(cell, bel)) { +                    log_error("Bel \'%s\' of type \'%s\' is not valid for cell " +                              "\'%s\' of type \'%s\'\n", +                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); +                } + +                auto bound_cell = ctx->getBoundBelCell(bel); +                if (bound_cell) { +                    log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", +                              cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); +                } + +                ctx->bindBel(bel, cell, STRENGTH_USER); +                placed_cells++; +            } +        } +        log_info("Placed %d cells based on constraints.\n", int(placed_cells)); +        ctx->yield(); +    } + +    // Construct the fast_bels, nearest_row_with_bel and nearest_col_with_bel +    void build_fast_bels() +    { + +        int num_bel_types = 0; +        for (auto bel : ctx->getBels()) { +            IdString type = ctx->getBelType(bel); +            if (bel_types.find(type) == bel_types.end()) { +                bel_types[type] = std::tuple<int, int>(num_bel_types++, 1); +            } else { +                std::get<1>(bel_types.at(type))++; +            } +        } +        for (auto bel : ctx->getBels()) { +            if (!ctx->checkBelAvail(bel)) +                continue; +            Loc loc = ctx->getBelLocation(bel); +            IdString type = ctx->getBelType(bel); +            int type_idx = std::get<0>(bel_types.at(type)); +            if (int(fast_bels.size()) < type_idx + 1) +                fast_bels.resize(type_idx + 1); +            if (int(fast_bels.at(type_idx).size()) < (loc.x + 1)) +                fast_bels.at(type_idx).resize(loc.x + 1); +            if (int(fast_bels.at(type_idx).at(loc.x).size()) < (loc.y + 1)) +                fast_bels.at(type_idx).at(loc.x).resize(loc.y + 1); +            max_x = std::max(max_x, loc.x); +            max_y = std::max(max_y, loc.y); +            fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel); +        } + +        nearest_row_with_bel.resize(num_bel_types, std::vector<int>(max_y + 1, -1)); +        nearest_col_with_bel.resize(num_bel_types, std::vector<int>(max_x + 1, -1)); +        for (auto bel : ctx->getBels()) { +            if (!ctx->checkBelAvail(bel)) +                continue; +            Loc loc = ctx->getBelLocation(bel); +            int type_idx = std::get<0>(bel_types.at(ctx->getBelType(bel))); +            auto &nr = nearest_row_with_bel.at(type_idx), &nc = nearest_col_with_bel.at(type_idx); +            // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once +            // another row/col is already recorded as being nearer +            for (int x = loc.x; x <= max_x; x++) { +                if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (x - loc.x)) +                    break; +                nc.at(x) = loc.x; +            } +            for (int x = loc.x - 1; x >= 0; x--) { +                if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (loc.x - x)) +                    break; +                nc.at(x) = loc.x; +            } +            for (int y = loc.y; y <= max_y; y++) { +                if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (y - loc.y)) +                    break; +                nr.at(y) = loc.y; +            } +            for (int y = loc.y - 1; y >= 0; y--) { +                if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (loc.y - y)) +                    break; +                nr.at(y) = loc.y; +            } +        } +    } + +    // Build and solve in one direction +    void build_solve_direction(bool yaxis, int iter) +    { +        for (int i = 0; i < 5; i++) { +            EquationSystem<double> esx(solve_cells.size(), solve_cells.size()); +            build_equations(esx, yaxis, iter); +            solve_equations(esx, yaxis); +        } +    } + +    // Check if a cell has any meaningful connectivity +    bool has_connectivity(CellInfo *cell) +    { +        for (auto port : cell->ports) { +            if (port.second.net != nullptr && port.second.net->driver.cell != nullptr && +                !port.second.net->users.empty()) +                return true; +        } +        return false; +    } + +    // Build up a random initial placement, without regard to legality +    // FIXME: Are there better approaches to the initial placement (e.g. greedy?) +    void seed_placement() +    { +        std::unordered_map<IdString, std::deque<BelId>> available_bels; +        for (auto bel : ctx->getBels()) { +            if (!ctx->checkBelAvail(bel)) +                continue; +            available_bels[ctx->getBelType(bel)].push_back(bel); +        } +        for (auto &t : available_bels) { +            std::random_shuffle(t.second.begin(), t.second.end(), [&](size_t n) { return ctx->rng(int(n)); }); +        } +        for (auto cell : sorted(ctx->cells)) { +            CellInfo *ci = cell.second; +            if (ci->bel != BelId()) { +                Loc loc = ctx->getBelLocation(ci->bel); +                cell_locs[cell.first].x = loc.x; +                cell_locs[cell.first].y = loc.y; +                cell_locs[cell.first].locked = true; +                cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); +            } else if (ci->constr_parent == nullptr) { +                bool placed = false; +                while (!placed) { +                    if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty()) +                        log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx), +                                  ci->type.c_str(ctx)); +                    BelId bel = available_bels.at(ci->type).back(); +                    available_bels.at(ci->type).pop_back(); +                    Loc loc = ctx->getBelLocation(bel); +                    cell_locs[cell.first].x = loc.x; +                    cell_locs[cell.first].y = loc.y; +                    cell_locs[cell.first].locked = false; +                    cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); +                    // FIXME +                    if (has_connectivity(cell.second) && !cfg.ioBufTypes.count(ci->type)) { +                        place_cells.push_back(ci); +                        placed = true; +                    } else { +                        if (ctx->isValidBelForCell(ci, bel)) { +                            ctx->bindBel(bel, ci, STRENGTH_STRONG); +                            cell_locs[cell.first].locked = true; +                            placed = true; +                        } else { +                            available_bels.at(ci->type).push_front(bel); +                        } +                    } +                } +            } +        } +    } + +    // Setup the cells to be solved, returns the number of rows +    int setup_solve_cells(std::unordered_set<IdString> *celltypes = nullptr) +    { +        int row = 0; +        solve_cells.clear(); +        // First clear the udata of all cells +        for (auto cell : sorted(ctx->cells)) +            cell.second->udata = dont_solve; +        // Then update cells to be placed, which excludes cell children +        for (auto cell : place_cells) { +            if (celltypes && !celltypes->count(cell->type)) +                continue; +            cell->udata = row++; +            solve_cells.push_back(cell); +        } +        // Finally, update the udata of children +        for (auto chained : chain_root) +            ctx->cells.at(chained.first)->udata = chained.second->udata; +        return row; +    } + +    // Update the location of all children of a chain +    void update_chain(CellInfo *cell, CellInfo *root) +    { +        const auto &base = cell_locs[cell->name]; +        for (auto child : cell->constr_children) { +            chain_size[root->name]++; +            if (child->constr_x != child->UNCONSTR) +                cell_locs[child->name].x = std::min(max_x, base.x + child->constr_x); +            else +                cell_locs[child->name].x = base.x; // better handling of UNCONSTR? +            if (child->constr_y != child->UNCONSTR) +                cell_locs[child->name].y = std::min(max_y, base.y + child->constr_y); +            else +                cell_locs[child->name].y = base.y; // better handling of UNCONSTR? +            chain_root[child->name] = root; +            if (!child->constr_children.empty()) +                update_chain(child, root); +        } +    } + +    // Update all chains +    void update_all_chains() +    { +        for (auto cell : place_cells) { +            chain_size[cell->name] = 1; +            if (!cell->constr_children.empty()) +                update_chain(cell, cell); +        } +    } + +    // Run a function on all ports of a net - including the driver and all users +    template <typename Tf> void foreach_port(NetInfo *net, Tf func) +    { +        if (net->driver.cell != nullptr) +            func(net->driver, -1); +        for (size_t i = 0; i < net->users.size(); i++) +            func(net->users.at(i), i); +    } + +    // Build the system of equations for either X or Y +    void build_equations(EquationSystem<double> &es, bool yaxis, int iter = -1) +    { +        // Return the x or y position of a cell, depending on ydir +        auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; +        auto legal_pos = [&](CellInfo *cell) { +            return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; +        }; + +        es.reset(); + +        for (auto net : sorted(ctx->nets)) { +            NetInfo *ni = net.second; +            if (ni->driver.cell == nullptr) +                continue; +            if (ni->users.empty()) +                continue; +            if (cell_locs.at(ni->driver.cell->name).global) +                continue; +            // Find the bounds of the net in this axis, and the ports that correspond to these bounds +            PortRef *lbport = nullptr, *ubport = nullptr; +            int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min(); +            foreach_port(ni, [&](PortRef &port, int user_idx) { +                int pos = cell_pos(port.cell); +                if (pos < lbpos) { +                    lbpos = pos; +                    lbport = &port; +                } +                if (pos > ubpos) { +                    ubpos = pos; +                    ubport = &port; +                } +            }); +            NPNR_ASSERT(lbport != nullptr); +            NPNR_ASSERT(ubport != nullptr); + +            auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) { +                if (eqn.cell->udata == dont_solve) +                    return; +                int row = eqn.cell->udata; +                int v_pos = cell_pos(var.cell); +                if (var.cell->udata != dont_solve) { +                    es.add_coeff(row, var.cell->udata, weight); +                } else { +                    es.add_rhs(row, -v_pos * weight); +                } +                if (cell_offsets.count(var.cell->name)) { +                    es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second +                                            : cell_offsets.at(var.cell->name).first) * +                                            weight); +                } +            }; + +            // Add all relevant connections to the matrix +            foreach_port(ni, [&](PortRef &port, int user_idx) { +                int this_pos = cell_pos(port.cell); +                auto process_arc = [&](PortRef *other) { +                    if (other == &port) +                        return; +                    int o_pos = cell_pos(other->cell); +                    double weight = 1.0 / (ni->users.size() * std::max<double>(1, std::abs(o_pos - this_pos))); + +                    if (user_idx != -1 && net_crit.count(ni->name)) { +                        auto &nc = net_crit.at(ni->name); +                        if (user_idx < int(nc.criticality.size())) +                            weight *= (1.0 + cfg.timingWeight * +                                                     std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent)); +                    } + +                    // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, +                    // if the other end isn't fixed +                    stamp_equation(port, port, weight); +                    stamp_equation(port, *other, -weight); +                    stamp_equation(*other, *other, weight); +                    stamp_equation(*other, port, -weight); +                }; +                process_arc(lbport); +                process_arc(ubport); +            }); +        } +        if (iter != -1) { +            float alpha = cfg.alpha; +            for (size_t row = 0; row < solve_cells.size(); row++) { +                int l_pos = legal_pos(solve_cells.at(row)); +                int c_pos = cell_pos(solve_cells.at(row)); + +                double weight = alpha * iter / std::max<double>(1, std::abs(l_pos - c_pos)); +                // Add an arc from legalised to current position +                es.add_coeff(row, row, weight); +                es.add_rhs(row, weight * l_pos); +            } +        } +    } + +    // Build the system of equations for either X or Y +    void solve_equations(EquationSystem<double> &es, bool yaxis) +    { +        // Return the x or y position of a cell, depending on ydir +        auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; +        std::vector<double> vals; +        std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); +        es.solve(vals); +        for (size_t i = 0; i < vals.size(); i++) +            if (yaxis) { +                cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); +                cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i)))); +            } else { +                cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); +                cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); +            } +    } + +    // Compute HPWL +    wirelen_t total_hpwl() +    { +        wirelen_t hpwl = 0; +        for (auto net : sorted(ctx->nets)) { +            NetInfo *ni = net.second; +            if (ni->driver.cell == nullptr) +                continue; +            CellLocation &drvloc = cell_locs.at(ni->driver.cell->name); +            if (drvloc.global) +                continue; +            int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y; +            for (auto &user : ni->users) { +                CellLocation &usrloc = cell_locs.at(user.cell->name); +                xmin = std::min(xmin, usrloc.x); +                xmax = std::max(xmax, usrloc.x); +                ymin = std::min(ymin, usrloc.y); +                ymax = std::max(ymax, usrloc.y); +            } +            hpwl += (xmax - xmin) + (ymax - ymin); +        } +        return hpwl; +    } + +    // Strict placement legalisation, performed after the initial HeAP spreading +    void legalise_placement_strict(bool require_validity = false) +    { +        auto startt = std::chrono::high_resolution_clock::now(); + +        // Unbind all cells placed in this solution +        for (auto cell : sorted(ctx->cells)) { +            CellInfo *ci = cell.second; +            if (ci->bel != BelId() && (ci->udata != dont_solve || +                                       (chain_root.count(ci->name) && chain_root.at(ci->name)->udata != dont_solve))) +                ctx->unbindBel(ci->bel); +        } + +        // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using +        // the simple greedy largest-macro-first approach. +        std::priority_queue<std::pair<int, IdString>> remaining; +        for (auto cell : solve_cells) { +            remaining.emplace(chain_size[cell->name], cell->name); +        } +        int ripup_radius = 2; +        int total_iters = 0; +        while (!remaining.empty()) { +            auto top = remaining.top(); +            remaining.pop(); + +            CellInfo *ci = ctx->cells.at(top.second).get(); +            // Was now placed, ignore +            if (ci->bel != BelId()) +                continue; +            // log_info("   Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx)); +            int bt = std::get<0>(bel_types.at(ci->type)); +            auto &fb = fast_bels.at(bt); +            int radius = 0; +            int iter = 0; +            int iter_at_radius = 0; +            bool placed = false; +            BelId bestBel; +            int best_inp_len = std::numeric_limits<int>::max(); + +            total_iters++; +            if (total_iters > int(solve_cells.size())) { +                total_iters = 0; +                ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2); +            } + +            while (!placed) { + +                int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); +                int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).y - radius, 0); + +                iter++; +                iter_at_radius++; +                if (iter >= (10 * (radius + 1))) { +                    radius = std::min(std::max(max_x, max_y), radius + 1); +                    while (radius < std::max(max_x, max_y)) { +                        for (int x = std::max(0, cell_locs.at(ci->name).x - radius); +                             x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) { +                            if (x >= int(fb.size())) +                                break; +                            for (int y = std::max(0, cell_locs.at(ci->name).y - radius); +                                 y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) { +                                if (y >= int(fb.at(x).size())) +                                    break; +                                if (fb.at(x).at(y).size() > 0) +                                    goto notempty; +                            } +                        } +                        radius = std::min(std::max(max_x, max_y), radius + 1); +                    } +                notempty: +                    iter_at_radius = 0; +                    iter = 0; +                } +                if (nx < 0 || nx > max_x) +                    continue; +                if (ny < 0 || ny > max_y) +                    continue; + +                // ny = nearest_row_with_bel.at(bt).at(ny); +                // nx = nearest_col_with_bel.at(bt).at(nx); + +                if (nx >= int(fb.size())) +                    continue; +                if (ny >= int(fb.at(nx).size())) +                    continue; +                if (fb.at(nx).at(ny).empty()) +                    continue; + +                int need_to_explore = 2 * radius; + +                if (iter_at_radius >= need_to_explore && bestBel != BelId()) { +                    CellInfo *bound = ctx->getBoundBelCell(bestBel); +                    if (bound != nullptr) { +                        ctx->unbindBel(bound->bel); +                        remaining.emplace(chain_size[bound->name], bound->name); +                    } +                    ctx->bindBel(bestBel, ci, STRENGTH_WEAK); +                    placed = true; +                    Loc loc = ctx->getBelLocation(bestBel); +                    cell_locs[ci->name].x = loc.x; +                    cell_locs[ci->name].y = loc.y; +                    break; +                } + +                if (ci->constr_children.empty() && !ci->constr_abs_z) { +                    for (auto sz : fb.at(nx).at(ny)) { +                        if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) { +                            CellInfo *bound = ctx->getBoundBelCell(sz); +                            if (bound != nullptr) { +                                if (bound->constr_parent != nullptr || !bound->constr_children.empty() || +                                    bound->constr_abs_z) +                                    continue; +                                ctx->unbindBel(bound->bel); +                            } +                            ctx->bindBel(sz, ci, STRENGTH_WEAK); +                            if (require_validity && !ctx->isBelLocationValid(sz)) { +                                ctx->unbindBel(sz); +                                if (bound != nullptr) +                                    ctx->bindBel(sz, bound, STRENGTH_WEAK); +                            } else if (iter_at_radius < need_to_explore) { +                                ctx->unbindBel(sz); +                                if (bound != nullptr) +                                    ctx->bindBel(sz, bound, STRENGTH_WEAK); +                                int input_len = 0; +                                for (auto &port : ci->ports) { +                                    auto &p = port.second; +                                    if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr) +                                        continue; +                                    CellInfo *drv = p.net->driver.cell; +                                    auto drv_loc = cell_locs.find(drv->name); +                                    if (drv_loc == cell_locs.end()) +                                        continue; +                                    if (drv_loc->second.global) +                                        continue; +                                    input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny); +                                } +                                if (input_len < best_inp_len) { +                                    best_inp_len = input_len; +                                    bestBel = sz; +                                } +                                break; +                            } else { +                                if (bound != nullptr) +                                    remaining.emplace(chain_size[bound->name], bound->name); +                                Loc loc = ctx->getBelLocation(sz); +                                cell_locs[ci->name].x = loc.x; +                                cell_locs[ci->name].y = loc.y; +                                placed = true; +                                break; +                            } +                        } +                    } +                } else { +                    for (auto sz : fb.at(nx).at(ny)) { +                        Loc loc = ctx->getBelLocation(sz); +                        if (ci->constr_abs_z && loc.z != ci->constr_z) +                            continue; +                        std::vector<std::pair<CellInfo *, BelId>> targets; +                        std::vector<std::pair<BelId, CellInfo *>> swaps_made; +                        std::queue<std::pair<CellInfo *, Loc>> visit; +                        visit.emplace(ci, loc); +                        while (!visit.empty()) { +                            CellInfo *vc = visit.front().first; +                            NPNR_ASSERT(vc->bel == BelId()); +                            Loc ploc = visit.front().second; +                            visit.pop(); +                            BelId target = ctx->getBelByLocation(ploc); +                            CellInfo *bound; +                            if (target == BelId() || ctx->getBelType(target) != vc->type) +                                goto fail; +                            bound = ctx->getBoundBelCell(target); +                            // Chains cannot overlap +                            if (bound != nullptr) +                                if (bound->constr_z != bound->UNCONSTR || bound->constr_parent != nullptr || +                                    !bound->constr_children.empty() || bound->belStrength > STRENGTH_WEAK) +                                    goto fail; +                            targets.emplace_back(vc, target); +                            for (auto child : vc->constr_children) { +                                Loc cloc = ploc; +                                if (child->constr_x != child->UNCONSTR) +                                    cloc.x += child->constr_x; +                                if (child->constr_y != child->UNCONSTR) +                                    cloc.y += child->constr_y; +                                if (child->constr_z != child->UNCONSTR) +                                    cloc.z = child->constr_abs_z ? child->constr_z : (ploc.z + child->constr_z); +                                visit.emplace(child, cloc); +                            } +                        } + +                        for (auto &target : targets) { +                            CellInfo *bound = ctx->getBoundBelCell(target.second); +                            if (bound != nullptr) +                                ctx->unbindBel(target.second); +                            ctx->bindBel(target.second, target.first, STRENGTH_STRONG); +                            swaps_made.emplace_back(target.second, bound); +                        } + +                        for (auto &sm : swaps_made) { +                            if (!ctx->isBelLocationValid(sm.first)) +                                goto fail; +                        } + +                        if (false) { +                        fail: +                            for (auto &swap : swaps_made) { +                                ctx->unbindBel(swap.first); +                                if (swap.second != nullptr) +                                    ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK); +                            } +                            continue; +                        } +                        for (auto &target : targets) { +                            Loc loc = ctx->getBelLocation(target.second); +                            cell_locs[target.first->name].x = loc.x; +                            cell_locs[target.first->name].y = loc.y; +                            // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z); +                        } +                        for (auto &swap : swaps_made) { +                            if (swap.second != nullptr) +                                remaining.emplace(chain_size[swap.second->name], swap.second->name); +                        } + +                        placed = true; +                        break; +                    } +                } +            } +        } +        auto endt = std::chrono::high_resolution_clock::now(); +        sl_time += std::chrono::duration<float>(endt - startt).count(); +    } +    // Implementation of the cut-based spreading as described in the HeAP/SimPL papers +    static constexpr float beta = 0.9; + +    struct ChainExtent +    { +        int x0, y0, x1, y1; +    }; + +    struct SpreaderRegion +    { +        int id; +        int x0, y0, x1, y1; +        int cells, bels; +        bool overused() const +        { +            if (bels < 4) +                return cells > bels; +            else +                return cells > beta * bels; +        } +    }; + +    class CutSpreader +    { +      public: +        CutSpreader(HeAPPlacer *p, IdString beltype) +                : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) +        { +        } +        static int seq; +        void run() +        { +            auto startt = std::chrono::high_resolution_clock::now(); +            init(); +            find_overused_regions(); +            for (auto &r : regions) { +                if (merged_regions.count(r.id)) +                    continue; +#if 0 +                log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, +                         r.bels); +#endif +            } +            expand_regions(); +            std::queue<std::pair<int, bool>> workqueue; +#if 0 +            std::vector<std::pair<double, double>> orig; +            if (ctx->debug) +                for (auto c : p->solve_cells) +                    orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy); +#endif +            for (auto &r : regions) { +                if (merged_regions.count(r.id)) +                    continue; +#if 0 +                log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, +                         r.bels); +#endif +                workqueue.emplace(r.id, false); +                // cut_region(r, false); +            } +            while (!workqueue.empty()) { +                auto front = workqueue.front(); +                workqueue.pop(); +                auto &r = regions.at(front.first); +                if (r.cells == 0) +                    continue; +                auto res = cut_region(r, front.second); +                if (res) { +                    workqueue.emplace(res->first, !front.second); +                    workqueue.emplace(res->second, !front.second); +                } else { +                    // Try the other dir, in case stuck in one direction only +                    auto res2 = cut_region(r, !front.second); +                    if (res2) { +                        // log_info("RETRY SUCCESS\n"); +                        workqueue.emplace(res2->first, front.second); +                        workqueue.emplace(res2->second, front.second); +                    } +                } +            } +#if 0 +            if (ctx->debug) { +                std::ofstream sp("spread" + std::to_string(seq) + ".csv"); +                for (size_t i = 0; i < p->solve_cells.size(); i++) { +                    auto &c = p->solve_cells.at(i); +                    if (c->type != beltype) +                        continue; +                    sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl; +                } +                std::ofstream oc("cells" + std::to_string(seq) + ".csv"); +                for (size_t y = 0; y <= p->max_y; y++) { +                    for (size_t x = 0; x <= p->max_x; x++) { +                        oc << cells_at_location.at(x).at(y).size() << ", "; +                    } +                    oc << std::endl; +                } +                ++seq; +            } +#endif +            auto endt = std::chrono::high_resolution_clock::now(); +            p->cl_time += std::chrono::duration<float>(endt - startt).count(); +        } + +      private: +        HeAPPlacer *p; +        Context *ctx; +        IdString beltype; +        std::vector<std::vector<int>> occupancy; +        std::vector<std::vector<int>> groups; +        std::vector<std::vector<ChainExtent>> chaines; +        std::map<IdString, ChainExtent> cell_extents; + +        std::vector<std::vector<std::vector<BelId>>> &fb; + +        std::vector<SpreaderRegion> regions; +        std::unordered_set<int> merged_regions; +        // Cells at a location, sorted by real (not integer) x and y +        std::vector<std::vector<std::vector<CellInfo *>>> cells_at_location; + +        int occ_at(int x, int y) { return occupancy.at(x).at(y); } + +        int bels_at(int x, int y) +        { +            if (x >= int(fb.size()) || y >= int(fb.at(x).size())) +                return 0; +            return int(fb.at(x).at(y).size()); +        } + +        void init() +        { +            occupancy.resize(p->max_x + 1, std::vector<int>(p->max_y + 1, 0)); +            groups.resize(p->max_x + 1, std::vector<int>(p->max_y + 1, -1)); +            chaines.resize(p->max_x + 1, std::vector<ChainExtent>(p->max_y + 1)); +            cells_at_location.resize(p->max_x + 1, std::vector<std::vector<CellInfo *>>(p->max_y + 1)); +            for (int x = 0; x <= p->max_x; x++) +                for (int y = 0; y <= p->max_y; y++) { +                    occupancy.at(x).at(y) = 0; +                    groups.at(x).at(y) = -1; +                    chaines.at(x).at(y) = {x, y, x, y}; +                } + +            auto set_chain_ext = [&](IdString cell, int x, int y) { +                if (!cell_extents.count(cell)) +                    cell_extents[cell] = {x, y, x, y}; +                else { +                    cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x); +                    cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y); +                    cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x); +                    cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y); +                } +            }; + +            for (auto &cell : p->cell_locs) { +                if (ctx->cells.at(cell.first)->type != beltype) +                    continue; +                if (ctx->cells.at(cell.first)->belStrength > STRENGTH_STRONG) +                    continue; +                occupancy.at(cell.second.x).at(cell.second.y)++; +                // Compute ultimate extent of each chain root +                if (p->chain_root.count(cell.first)) { +                    set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y); +                } else if (!ctx->cells.at(cell.first)->constr_children.empty()) { +                    set_chain_ext(cell.first, cell.second.x, cell.second.y); +                } +            } +            for (auto &cell : p->cell_locs) { +                if (ctx->cells.at(cell.first)->type != beltype) +                    continue; +                // Transfer chain extents to the actual chaines structure +                ChainExtent *ce = nullptr; +                if (p->chain_root.count(cell.first)) +                    ce = &(cell_extents.at(p->chain_root.at(cell.first)->name)); +                else if (!ctx->cells.at(cell.first)->constr_children.empty()) +                    ce = &(cell_extents.at(cell.first)); +                if (ce) { +                    auto &lce = chaines.at(cell.second.x).at(cell.second.y); +                    lce.x0 = std::min(lce.x0, ce->x0); +                    lce.y0 = std::min(lce.y0, ce->y0); +                    lce.x1 = std::max(lce.x1, ce->x1); +                    lce.y1 = std::max(lce.y1, ce->y1); +                } +            } +            for (auto cell : p->solve_cells) { +                if (cell->type != beltype) +                    continue; +                cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); +            } +        } +        void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee) +        { +            // Prevent grow_region from recursing while doing this +            for (int x = mergee.x0; x <= mergee.x1; x++) +                for (int y = mergee.y0; y <= mergee.y1; y++) { +                    // log_info("%d %d\n", groups.at(x).at(y), mergee.id); +                    NPNR_ASSERT(groups.at(x).at(y) == mergee.id); +                    groups.at(x).at(y) = merged.id; +                    merged.cells += occ_at(x, y); +                    merged.bels += bels_at(x, y); +                } +            merged_regions.insert(mergee.id); +            grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); +        } + +        void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false) +        { +            // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); +            if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) +                return; +            int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1; +            r.x0 = std::min(r.x0, x0); +            r.y0 = std::min(r.y0, y0); +            r.x1 = std::max(r.x1, x1); +            r.y1 = std::max(r.y1, y1); + +            auto process_location = [&](int x, int y) { +                // Merge with any overlapping regions +                if (groups.at(x).at(y) == -1) { +                    r.bels += bels_at(x, y); +                    r.cells += occ_at(x, y); +                } +                if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) +                    merge_regions(r, regions.at(groups.at(x).at(y))); +                groups.at(x).at(y) = r.id; +                // Grow to cover any chains +                auto &chaine = chaines.at(x).at(y); +                grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1); +            }; +            for (int x = r.x0; x < old_x0; x++) +                for (int y = r.y0; y <= r.y1; y++) +                    process_location(x, y); +            for (int x = old_x1 + 1; x <= x1; x++) +                for (int y = r.y0; y <= r.y1; y++) +                    process_location(x, y); +            for (int y = r.y0; y < old_y0; y++) +                for (int x = r.x0; x <= r.x1; x++) +                    process_location(x, y); +            for (int y = old_y1 + 1; y <= r.y1; y++) +                for (int x = r.x0; x <= r.x1; x++) +                    process_location(x, y); +        } + +        void find_overused_regions() +        { +            for (int x = 0; x <= p->max_x; x++) +                for (int y = 0; y <= p->max_y; y++) { +                    // Either already in a group, or not overutilised. Ignore +                    if (groups.at(x).at(y) != -1 || (occ_at(x, y) <= bels_at(x, y))) +                        continue; +                    // log_info("%d %d %d\n", x, y, occ_at(x, y)); +                    int id = int(regions.size()); +                    groups.at(x).at(y) = id; +                    SpreaderRegion reg; +                    reg.id = id; +                    reg.x0 = reg.x1 = x; +                    reg.y0 = reg.y1 = y; +                    reg.bels = bels_at(x, y); +                    reg.cells = occ_at(x, y); +                    // Make sure we cover carries, etc +                    grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); + +                    bool expanded = true; +                    while (expanded) { +                        expanded = false; +                        // Keep trying expansion in x and y, until we find no over-occupancy cells +                        // or hit grouped cells + +                        // First try expanding in x +                        if (reg.x1 < p->max_x) { +                            bool over_occ_x = false; +                            for (int y1 = reg.y0; y1 <= reg.y1; y1++) { +                                if (occ_at(reg.x1 + 1, y1) > bels_at(reg.x1 + 1, y1)) { +                                    // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1), +                                    // bels_at(reg.x1 + 1, y1)); +                                    over_occ_x = true; +                                    break; +                                } +                            } +                            if (over_occ_x) { +                                expanded = true; +                                grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); +                            } +                        } + +                        if (reg.y1 < p->max_y) { +                            bool over_occ_y = false; +                            for (int x1 = reg.x0; x1 <= reg.x1; x1++) { +                                if (occ_at(x1, reg.y1 + 1) > bels_at(x1, reg.y1 + 1)) { +                                    // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1), +                                    // bels_at(x1, reg.y1 + 1)); +                                    over_occ_y = true; +                                    break; +                                } +                            } +                            if (over_occ_y) { +                                expanded = true; +                                grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); +                            } +                        } +                    } +                    regions.push_back(reg); +                } +        } + +        void expand_regions() +        { +            std::queue<int> overu_regions; +            for (auto &r : regions) { +                if (!merged_regions.count(r.id) && r.overused()) +                    overu_regions.push(r.id); +            } +            while (!overu_regions.empty()) { +                int rid = overu_regions.front(); +                overu_regions.pop(); +                if (merged_regions.count(rid)) +                    continue; +                auto ® = regions.at(rid); +                while (reg.overused()) { +                    bool changed = false; +                    if (reg.x0 > 0) { +                        grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); +                        changed = true; +                        if (!reg.overused()) +                            break; +                    } +                    if (reg.x1 < p->max_x) { +                        grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); +                        changed = true; +                        if (!reg.overused()) +                            break; +                    } +                    if (reg.y0 > 0) { +                        grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); +                        changed = true; +                        if (!reg.overused()) +                            break; +                    } +                    if (reg.y1 < p->max_y) { +                        grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); +                        changed = true; +                        if (!reg.overused()) +                            break; +                    } +                    if (!changed) { +                        if (reg.cells > reg.bels) +                            log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, +                                      reg.x1, reg.y1, reg.cells, beltype.c_str(ctx)); +                        else +                            break; +                    } +                } +            } +        } + +        // Implementation of the recursive cut-based spreading as described in the HeAP paper +        // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir + +        std::vector<CellInfo *> cut_cells; + +        boost::optional<std::pair<int, int>> cut_region(SpreaderRegion &r, bool dir) +        { +            cut_cells.clear(); +            auto &cal = cells_at_location; +            int total_cells = 0, total_bels = 0; +            for (int x = r.x0; x <= r.x1; x++) { +                for (int y = r.y0; y <= r.y1; y++) { +                    std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); +                    total_bels += bels_at(x, y); +                } +            } +            for (auto &cell : cut_cells) { +                total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; +            } +            std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { +                return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) +                           : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); +            }); + +            if (cut_cells.size() < 2) +                return {}; +            // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut +            int pivot_cells = 0; +            int pivot = 0; +            for (auto &cell : cut_cells) { +                pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; +                if (pivot_cells >= total_cells / 2) +                    break; +                pivot++; +            } +            if (pivot == int(cut_cells.size())) +                pivot = int(cut_cells.size()) - 1; +            // log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); + +            // Find the clearance required either side of the pivot +            int clearance_l = 0, clearance_r = 0; +            for (size_t i = 0; i < cut_cells.size(); i++) { +                int size; +                if (cell_extents.count(cut_cells.at(i)->name)) { +                    auto &ce = cell_extents.at(cut_cells.at(i)->name); +                    size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1); +                } else { +                    size = 1; +                } +                if (int(i) < pivot) +                    clearance_l = std::max(clearance_l, size); +                else +                    clearance_r = std::max(clearance_r, size); +            } +            // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains +            // still fit + +            // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any +            // bels of the appropriate type +            int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1; +            while (trimmed_l < (dir ? r.y1 : r.x1)) { +                bool have_bels = false; +                for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) +                    if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i) > 0) { +                        have_bels = true; +                        break; +                    } +                if (have_bels) +                    break; +                trimmed_l++; +            } +            while (trimmed_r > (dir ? r.y0 : r.x0)) { +                bool have_bels = false; +                for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) +                    if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i) > 0) { +                        have_bels = true; +                        break; +                    } +                if (have_bels) +                    break; +                trimmed_r--; +            } +            // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); +            if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) +                return {}; +            // Now find the initial target cut that minimises utilisation imbalance, whilst +            // meeting the clearance requirements for any large macros +            int left_cells = pivot_cells, right_cells = total_cells - pivot_cells; +            int left_bels = 0, right_bels = total_bels; +            int best_tgt_cut = -1; +            double best_deltaU = std::numeric_limits<double>::max(); +            std::pair<int, int> target_cut_bels; +            for (int i = trimmed_l; i <= trimmed_r; i++) { +                int slither_bels = 0; +                for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { +                    slither_bels += dir ? bels_at(j, i) : bels_at(i, j); +                } +                left_bels += slither_bels; +                right_bels -= slither_bels; +                if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { +                    // Solution is potentially valid +                    double aU = +                            std::abs(double(left_cells) / double(left_bels) - double(right_cells) / double(right_bels)); +                    if (aU < best_deltaU) { +                        best_deltaU = aU; +                        best_tgt_cut = i; +                        target_cut_bels = std::make_pair(left_bels, right_bels); +                    } +                } +            } +            if (best_tgt_cut == -1) +                return {}; +            left_bels = target_cut_bels.first; +            right_bels = target_cut_bels.second; +            // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, +            // right_cells, right_bels); + +            // Peturb the source cut to eliminate overutilisation +            while (pivot > 0 && (double(left_cells) / double(left_bels) > double(right_cells) / double(right_bels))) { +                auto &move_cell = cut_cells.at(pivot); +                int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; +                left_cells -= size; +                right_cells += size; +                pivot--; +            } +            while (pivot < int(cut_cells.size()) - 1 && +                   (double(left_cells) / double(left_bels) < double(right_cells) / double(right_bels))) { +                auto &move_cell = cut_cells.at(pivot + 1); +                int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; +                left_cells += size; +                right_cells -= size; +                pivot++; +            } +            // log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, +            // right_bels); +            // Split regions into bins, and then spread cells by linear interpolation within those bins +            auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { +                int N = cells_end - cells_start; +                if (N <= 2) { +                    for (int i = cells_start; i < cells_end; i++) { +                        auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy +                                        : p->cell_locs.at(cut_cells.at(i)->name).rawx; +                        pos = area_l + i * ((area_r - area_l) / N); +                    } +                    return; +                } +                // Split region into up to 10 (K) bins +                int K = std::min<int>(N, 10); +                std::vector<std::pair<int, double>> bin_bounds; // [(cell start, area start)] +                bin_bounds.emplace_back(cells_start, area_l); +                for (int i = 1; i < K; i++) +                    bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K); +                bin_bounds.emplace_back(cells_end, area_r + 0.99); +                for (int i = 0; i < K; i++) { +                    auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); +                    double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy +                                           : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; +                    double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy +                                            : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; +                    double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left); +                    for (int j = bl.first; j < br.first; j++) { +                        auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy +                                        : p->cell_locs.at(cut_cells.at(j)->name).rawx; +                        NPNR_ASSERT(pos >= orig_left && pos <= orig_right); +                        pos = bl.second + m * (pos - orig_left); +                        // log("[%f, %f] -> [%f, %f]: %f -> %f\n", orig_left, orig_right, bl.second, br.second, +                        // orig_pos, pos); +                    } +                } +            }; +            spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut); +            spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r); +            // Update various data structures +            for (int x = r.x0; x <= r.x1; x++) +                for (int y = r.y0; y <= r.y1; y++) { +                    cells_at_location.at(x).at(y).clear(); +                } +            for (auto cell : cut_cells) { +                auto &cl = p->cell_locs.at(cell->name); +                cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx))); +                cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy))); +                cells_at_location.at(cl.x).at(cl.y).push_back(cell); +                // log_info("spread pos %d %d\n", cl.x, cl.y); +            } +            SpreaderRegion rl, rr; +            rl.id = int(regions.size()); +            rl.x0 = r.x0; +            rl.y0 = r.y0; +            rl.x1 = dir ? r.x1 : best_tgt_cut; +            rl.y1 = dir ? best_tgt_cut : r.y1; +            rl.cells = left_cells; +            rl.bels = left_bels; +            rr.id = int(regions.size()) + 1; +            rr.x0 = dir ? r.x0 : (best_tgt_cut + 1); +            rr.y0 = dir ? (best_tgt_cut + 1) : r.y0; +            rr.x1 = r.x1; +            rr.y1 = r.y1; +            rr.cells = right_cells; +            rr.bels = right_bels; +            regions.push_back(rl); +            regions.push_back(rr); +            for (int x = rl.x0; x <= rl.x1; x++) +                for (int y = rl.y0; y <= rl.y1; y++) +                    groups.at(x).at(y) = rl.id; +            for (int x = rr.x0; x <= rr.x1; x++) +                for (int y = rr.y0; y <= rr.y1; y++) +                    groups.at(x).at(y) = rr.id; +            return std::make_pair(rl.id, rr.id); +        }; +    }; +    typedef decltype(CellInfo::udata) cell_udata_t; +    cell_udata_t dont_solve = std::numeric_limits<cell_udata_t>::max(); +}; +int HeAPPlacer::CutSpreader::seq = 0; + +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); } + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) : Settings(ctx) +{ +    alpha = get<float>("placerHeap/alpha", 0.1); +    criticalityExponent = get<int>("placerHeap/criticalityExponent", 2); +    timingWeight = get<int>("placerHeap/timingWeight", 10); +} + +NEXTPNR_NAMESPACE_END + +#else + +#include "log.h" +#include "nextpnr.h" +#include "placer_heap.h" + +NEXTPNR_NAMESPACE_BEGIN +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) +{ +    log_error("nextpnr was built without the HeAP placer\n"); +    return false; +} + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) : Settings(ctx) {} + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/placer_heap.h b/common/placer_heap.h new file mode 100644 index 00000000..841aa0d9 --- /dev/null +++ b/common/placer_heap.h @@ -0,0 +1,47 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2019  David Shah <david@symbioticeda.com> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + *  [[cite]] HeAP + *  Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + *  https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + *  [[cite]] SimPL + *  SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + *  http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + */ + +#ifndef PLACER_HEAP_H +#define PLACER_HEAP_H +#include "nextpnr.h" +#include "settings.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct PlacerHeapCfg : public Settings +{ +    PlacerHeapCfg(Context *ctx); + +    float alpha; +    float criticalityExponent; +    float timingWeight; + +    std::unordered_set<IdString> ioBufTypes; +}; + +extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); +NEXTPNR_NAMESPACE_END +#endif diff --git a/common/pybindings.cc b/common/pybindings.cc index 6cae889d..60f87e27 100644 --- a/common/pybindings.cc +++ b/common/pybindings.cc @@ -23,8 +23,10 @@  #include "pybindings.h"  #include "arch_pybindings.h"  #include "jsonparse.h" +#include "log.h"  #include "nextpnr.h" +#include <boost/filesystem.hpp>  #include <fstream>  #include <memory>  #include <signal.h> @@ -87,7 +89,26 @@ BOOST_PYTHON_MODULE(MODULE_NAME)      using namespace PythonConversion; +    enum_<GraphicElement::type_t>("GraphicElementType") +            .value("TYPE_NONE", GraphicElement::TYPE_NONE) +            .value("TYPE_LINE", GraphicElement::TYPE_LINE) +            .value("TYPE_ARROW", GraphicElement::TYPE_ARROW) +            .value("TYPE_BOX", GraphicElement::TYPE_BOX) +            .value("TYPE_CIRCLE", GraphicElement::TYPE_CIRCLE) +            .value("TYPE_LABEL", GraphicElement::TYPE_LABEL) +            .export_values(); + +    enum_<GraphicElement::style_t>("GraphicElementStyle") +            .value("STYLE_GRID", GraphicElement::STYLE_GRID) +            .value("STYLE_FRAME", GraphicElement::STYLE_FRAME) +            .value("STYLE_HIDDEN", GraphicElement::STYLE_HIDDEN) +            .value("STYLE_INACTIVE", GraphicElement::STYLE_INACTIVE) +            .value("STYLE_ACTIVE", GraphicElement::STYLE_ACTIVE) +            .export_values(); +      class_<GraphicElement>("GraphicElement") +            .def(init<GraphicElement::type_t, GraphicElement::style_t, float, float, float, float, float>( +                    (args("type"), "style", "x1", "y1", "x2", "y2", "z")))              .def_readwrite("type", &GraphicElement::type)              .def_readwrite("x1", &GraphicElement::x1)              .def_readwrite("y1", &GraphicElement::y1) @@ -104,9 +125,16 @@ BOOST_PYTHON_MODULE(MODULE_NAME)      typedef std::unordered_map<IdString, std::string> AttrMap;      typedef std::unordered_map<IdString, PortInfo> PortMap;      typedef std::unordered_map<IdString, IdString> PinMap; +    typedef std::unordered_map<IdString, std::unique_ptr<Region>> RegionMap;      class_<BaseCtx, BaseCtx *, boost::noncopyable>("BaseCtx", no_init); +    auto loc_cls = class_<Loc>("Loc") +                           .def(init<int, int, int>()) +                           .def_readwrite("x", &Loc::x) +                           .def_readwrite("y", &Loc::y) +                           .def_readwrite("z", &Loc::z); +      auto ci_cls = class_<ContextualWrapper<CellInfo &>>("CellInfo", no_init);      readwrite_wrapper<CellInfo &, decltype(&CellInfo::name), &CellInfo::name, conv_to_str<IdString>,                        conv_from_str<IdString>>::def_wrap(ci_cls, "name"); @@ -135,6 +163,8 @@ BOOST_PYTHON_MODULE(MODULE_NAME)      typedef std::vector<PortRef> PortRefVector;      typedef std::unordered_map<WireId, PipMap> WireMap; +    typedef std::unordered_set<BelId> BelSet; +    typedef std::unordered_set<WireId> WireSet;      auto ni_cls = class_<ContextualWrapper<NetInfo &>>("NetInfo", no_init);      readwrite_wrapper<NetInfo &, decltype(&NetInfo::name), &NetInfo::name, conv_to_str<IdString>, @@ -163,10 +193,25 @@ BOOST_PYTHON_MODULE(MODULE_NAME)      def("parse_json", parse_json_shim);      def("load_design", load_design_shim, return_value_policy<manage_new_object>()); +    auto region_cls = class_<ContextualWrapper<Region &>>("Region", no_init); +    readwrite_wrapper<Region &, decltype(&Region::name), &Region::name, conv_to_str<IdString>, +                      conv_from_str<IdString>>::def_wrap(region_cls, "name"); +    readwrite_wrapper<Region &, decltype(&Region::constr_bels), &Region::constr_bels, pass_through<bool>, +                      pass_through<bool>>::def_wrap(region_cls, "constr_bels"); +    readwrite_wrapper<Region &, decltype(&Region::constr_wires), &Region::constr_wires, pass_through<bool>, +                      pass_through<bool>>::def_wrap(region_cls, "constr_bels"); +    readwrite_wrapper<Region &, decltype(&Region::constr_pips), &Region::constr_pips, pass_through<bool>, +                      pass_through<bool>>::def_wrap(region_cls, "constr_pips"); +    readonly_wrapper<Region &, decltype(&Region::bels), &Region::bels, wrap_context<BelSet &>>::def_wrap(region_cls, +                                                                                                         "bels"); +    readonly_wrapper<Region &, decltype(&Region::wires), &Region::wires, wrap_context<WireSet &>>::def_wrap(region_cls, +                                                                                                            "wires"); +      WRAP_MAP(AttrMap, pass_through<std::string>, "AttrMap");      WRAP_MAP(PortMap, wrap_context<PortInfo &>, "PortMap");      WRAP_MAP(PinMap, conv_to_str<IdString>, "PinMap");      WRAP_MAP(WireMap, wrap_context<PipMap &>, "WireMap"); +    WRAP_MAP_UPTR(RegionMap, "RegionMap");      WRAP_VECTOR(PortRefVector, wrap_context<PortRef &>); @@ -190,8 +235,14 @@ void init_python(const char *executable, bool first)              PyImport_AppendInittab(TOSTRING(MODULE_NAME), PYINIT_MODULE_NAME);          Py_SetProgramName(program);          Py_Initialize(); -        if (first) -            PyImport_ImportModule(TOSTRING(MODULE_NAME)); + +        // Add cwd to Python's search path so `import` can be used in user scripts +        boost::filesystem::path cwd = boost::filesystem::absolute("./").normalize(); +        PyObject *sys_path = PySys_GetObject("path"); +        PyList_Insert(sys_path, 0, PyUnicode_FromString(cwd.string().c_str())); + +        PyImport_ImportModule(TOSTRING(MODULE_NAME)); +        PyRun_SimpleString("from " TOSTRING(MODULE_NAME) " import *");      } catch (boost::python::error_already_set const &) {          // Parse and output the exception          std::string perror_str = parse_python_exception(); @@ -217,12 +268,15 @@ void execute_python_file(const char *python_file)              fprintf(stderr, "Fatal error: file not found %s\n", python_file);              exit(1);          } -        PyRun_SimpleFile(fp, python_file); +        int result = PyRun_SimpleFile(fp, python_file);          fclose(fp); +        if (result == -1) { +            log_error("Error occurred while executing Python script %s\n", python_file); +        }      } catch (boost::python::error_already_set const &) {          // Parse and output the exception          std::string perror_str = parse_python_exception(); -        std::cout << "Error in Python: " << perror_str << std::endl; +        log_error("Error in Python: %s\n", perror_str.c_str());      }  } diff --git a/common/pycontainers.h b/common/pycontainers.h index 70f69c51..5de2f6d2 100644 --- a/common/pycontainers.h +++ b/common/pycontainers.h @@ -345,6 +345,12 @@ template <typename T, typename value_conv> struct map_wrapper          std::terminate();      } +    static bool contains(wrapped_map &x, std::string const &i) +    { +        K k = PythonConversion::string_converter<K>().from_str(x.ctx, i); +        return x.base.count(k); +    } +      static void wrap(const char *map_name, const char *kv_name, const char *kv_iter_name, const char *iter_name)      {          map_pair_wrapper<typename KV::first_type, typename KV::second_type, value_conv>::wrap(kv_name, kv_iter_name); @@ -353,6 +359,7 @@ template <typename T, typename value_conv> struct map_wrapper          class_<wrapped_map>(map_name, no_init)                  .def("__iter__", rw::iter)                  .def("__len__", len) +                .def("__contains__", contains)                  .def("__getitem__", get)                  .def("__setitem__", set, with_custodian_and_ward<1, 2>());      } @@ -465,6 +472,12 @@ template <typename T> struct map_wrapper_uptr          std::terminate();      } +    static bool contains(wrapped_map &x, std::string const &i) +    { +        K k = PythonConversion::string_converter<K>().from_str(x.ctx, i); +        return x.base.count(k); +    } +      static void wrap(const char *map_name, const char *kv_name, const char *kv_iter_name, const char *iter_name)      {          map_pair_wrapper_uptr<typename KV::first_type, typename KV::second_type>::wrap(kv_name, kv_iter_name); @@ -473,6 +486,7 @@ template <typename T> struct map_wrapper_uptr          class_<wrapped_map>(map_name, no_init)                  .def("__iter__", rw::iter)                  .def("__len__", len) +                .def("__contains__", contains)                  .def("__getitem__", get)                  .def("__setitem__", set, with_custodian_and_ward<1, 2>());      } diff --git a/common/pywrappers.h b/common/pywrappers.h index 4e463afd..1d970985 100644 --- a/common/pywrappers.h +++ b/common/pywrappers.h @@ -155,11 +155,15 @@ template <typename Class, typename FuncT, FuncT fn, typename rv_conv> struct fn_      using class_type = typename WrapIfNotContext<Class>::maybe_wrapped_t;      using conv_result_type = typename rv_conv::ret_type; -    static conv_result_type wrapped_fn(class_type &cls) +    static object wrapped_fn(class_type &cls)      {          Context *ctx = get_ctx<Class>(cls);          Class &base = get_base<Class>(cls); -        return rv_conv()(ctx, (base.*fn)()); +        try { +            return object(rv_conv()(ctx, (base.*fn)())); +        } catch (bad_wrap &) { +            return object(); +        }      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } @@ -172,11 +176,15 @@ template <typename Class, typename FuncT, FuncT fn, typename rv_conv, typename a      using conv_result_type = typename rv_conv::ret_type;      using conv_arg1_type = typename arg1_conv::arg_type; -    static conv_result_type wrapped_fn(class_type &cls, conv_arg1_type arg1) +    static object wrapped_fn(class_type &cls, conv_arg1_type arg1)      {          Context *ctx = get_ctx<Class>(cls);          Class &base = get_base<Class>(cls); -        return rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1))); +        try { +            return object(rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1)))); +        } catch (bad_wrap &) { +            return object(); +        }      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } @@ -191,11 +199,15 @@ struct fn_wrapper_2a      using conv_arg1_type = typename arg1_conv::arg_type;      using conv_arg2_type = typename arg2_conv::arg_type; -    static conv_result_type wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2) +    static object wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2)      {          Context *ctx = get_ctx<Class>(cls);          Class &base = get_base<Class>(cls); -        return rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2))); +        try { +            return object(rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2)))); +        } catch (bad_wrap &) { +            return object(); +        }      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } @@ -212,11 +224,16 @@ struct fn_wrapper_3a      using conv_arg2_type = typename arg2_conv::arg_type;      using conv_arg3_type = typename arg3_conv::arg_type; -    static conv_result_type wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3) +    static object wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3)      {          Context *ctx = get_ctx<Class>(cls);          Class &base = get_base<Class>(cls); -        return rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3))); +        try { +            return object( +                    rv_conv()(ctx, (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3)))); +        } catch (bad_wrap &) { +            return object(); +        }      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } @@ -250,6 +267,11 @@ template <typename Class, typename FuncT, FuncT fn, typename arg1_conv> struct f      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, Ta a = arg("arg1")) +    { +        cls_.def(name, wrapped_fn, a); +    }  };  // Two parameters, one return @@ -267,9 +289,14 @@ template <typename Class, typename FuncT, FuncT fn, typename arg1_conv, typename      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, const Ta &a) +    { +        cls_.def(name, wrapped_fn, a); +    }  }; -// Three parameters, one return +// Three parameters, no return  template <typename Class, typename FuncT, FuncT fn, typename arg1_conv, typename arg2_conv, typename arg3_conv>  struct fn_wrapper_3a_v  { @@ -286,6 +313,98 @@ struct fn_wrapper_3a_v      }      template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, const Ta &a) +    { +        cls_.def(name, wrapped_fn, a); +    } +}; + +// Four parameters, no return +template <typename Class, typename FuncT, FuncT fn, typename arg1_conv, typename arg2_conv, typename arg3_conv, +          typename arg4_conv> +struct fn_wrapper_4a_v +{ +    using class_type = typename WrapIfNotContext<Class>::maybe_wrapped_t; +    using conv_arg1_type = typename arg1_conv::arg_type; +    using conv_arg2_type = typename arg2_conv::arg_type; +    using conv_arg3_type = typename arg3_conv::arg_type; +    using conv_arg4_type = typename arg4_conv::arg_type; + +    static void wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3, +                           conv_arg4_type arg4) +    { +        Context *ctx = get_ctx<Class>(cls); +        Class &base = get_base<Class>(cls); +        return (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3), +                          arg4_conv()(ctx, arg4)); +    } + +    template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, const Ta &a) +    { +        cls_.def(name, wrapped_fn, a); +    } +}; + +// Five parameters, no return +template <typename Class, typename FuncT, FuncT fn, typename arg1_conv, typename arg2_conv, typename arg3_conv, +          typename arg4_conv, typename arg5_conv> +struct fn_wrapper_5a_v +{ +    using class_type = typename WrapIfNotContext<Class>::maybe_wrapped_t; +    using conv_arg1_type = typename arg1_conv::arg_type; +    using conv_arg2_type = typename arg2_conv::arg_type; +    using conv_arg3_type = typename arg3_conv::arg_type; +    using conv_arg4_type = typename arg4_conv::arg_type; +    using conv_arg5_type = typename arg5_conv::arg_type; + +    static void wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3, +                           conv_arg4_type arg4, conv_arg5_type arg5) +    { +        Context *ctx = get_ctx<Class>(cls); +        Class &base = get_base<Class>(cls); +        return (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3), +                          arg4_conv()(ctx, arg4), arg5_conv()(ctx, arg5)); +    } + +    template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, const Ta &a) +    { +        cls_.def(name, wrapped_fn, a); +    } +}; + +// Six parameters, no return +template <typename Class, typename FuncT, FuncT fn, typename arg1_conv, typename arg2_conv, typename arg3_conv, +          typename arg4_conv, typename arg5_conv, typename arg6_conv> +struct fn_wrapper_6a_v +{ +    using class_type = typename WrapIfNotContext<Class>::maybe_wrapped_t; +    using conv_arg1_type = typename arg1_conv::arg_type; +    using conv_arg2_type = typename arg2_conv::arg_type; +    using conv_arg3_type = typename arg3_conv::arg_type; +    using conv_arg4_type = typename arg4_conv::arg_type; +    using conv_arg5_type = typename arg5_conv::arg_type; +    using conv_arg6_type = typename arg6_conv::arg_type; + +    static void wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3, +                           conv_arg4_type arg4, conv_arg5_type arg5, conv_arg6_type arg6) +    { +        Context *ctx = get_ctx<Class>(cls); +        Class &base = get_base<Class>(cls); +        return (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3), +                          arg4_conv()(ctx, arg4), arg5_conv()(ctx, arg5), arg6_conv()(ctx, arg6)); +    } + +    template <typename WrapCls> static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } + +    template <typename WrapCls, typename Ta> static void def_wrap(WrapCls cls_, const char *name, const Ta &a) +    { +        cls_.def(name, wrapped_fn, a); +    }  };  // Wrapped getter diff --git a/common/router1.cc b/common/router1.cc index cbc0df90..28a422c8 100644 --- a/common/router1.cc +++ b/common/router1.cc @@ -17,6 +17,7 @@   *   */ +#include <chrono>  #include <cmath>  #include <queue> @@ -752,6 +753,7 @@ bool router1(Context *ctx, const Router1Cfg &cfg)          log_break();          log_info("Routing..\n");          ctx->lock(); +        auto rstart = std::chrono::high_resolution_clock::now();          log_info("Setting up routing queue.\n"); @@ -803,7 +805,9 @@ bool router1(Context *ctx, const Router1Cfg &cfg)                   router.arcs_with_ripup - last_arcs_with_ripup, router.arcs_without_ripup - last_arcs_without_ripup,                   int(router.arc_queue.size()));          log_info("Routing complete.\n"); +        auto rend = std::chrono::high_resolution_clock::now();          ctx->yield(); +        log_info("Route time %.02fs\n", std::chrono::duration<float>(rend - rstart).count());  #ifndef NDEBUG          router.check(); diff --git a/common/settings.h b/common/settings.h index 0c4a67db..b57947c9 100644 --- a/common/settings.h +++ b/common/settings.h @@ -45,19 +45,30 @@ class Settings          return defaultValue;      } -    template <typename T> void set(const char *name, T value) -    { -        IdString id = ctx->id(name); -        auto pair = ctx->settings.emplace(id, std::to_string(value)); -        if (!pair.second) { -            ctx->settings[pair.first->first] = value; -        } -    } +    template <typename T> void set(const char *name, T value);    private:      Context *ctx;  }; +template <typename T> inline void Settings::set(const char *name, T value) +{ +    IdString id = ctx->id(name); +    auto pair = ctx->settings.emplace(id, std::to_string(value)); +    if (!pair.second) { +        ctx->settings[pair.first->first] = value; +    } +} + +template <> inline void Settings::set<std::string>(const char *name, std::string value) +{ +    IdString id = ctx->id(name); +    auto pair = ctx->settings.emplace(id, value); +    if (!pair.second) { +        ctx->settings[pair.first->first] = value; +    } +} +  NEXTPNR_NAMESPACE_END  #endif // SETTINGS_H diff --git a/common/timing.cc b/common/timing.cc index 88ab14c2..2ce9eea3 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -86,6 +86,7 @@ struct CriticalPath  };  typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap; +typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;  struct Timing  { @@ -95,6 +96,7 @@ struct Timing      delay_t min_slack;      CriticalPathMap *crit_path;      DelayFrequency *slack_histogram; +    NetCriticalityMap *net_crit;      IdString async_clock;      struct TimingData @@ -105,13 +107,15 @@ struct Timing          unsigned max_path_length = 0;          delay_t min_remaining_budget;          bool false_startpoint = false; +        std::vector<delay_t> min_required;          std::unordered_map<ClockEvent, delay_t> arrival_time;      };      Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr, -           DelayFrequency *slack_histogram = nullptr) +           DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr)              : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->target_freq), -              crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$")) +              crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit), +              async_clock(ctx->id("$async$"))      {      } @@ -221,7 +225,7 @@ struct Timing          }          // Sanity check to ensure that all ports where fanins were recorded were indeed visited -        if (!port_fanin.empty()) { +        if (!port_fanin.empty() && !bool_or_default(ctx->settings, ctx->id("timing/ignoreLoops"), false)) {              for (auto fanin : port_fanin) {                  NetInfo *net = fanin.first->net;                  if (net != nullptr) { @@ -263,8 +267,7 @@ struct Timing                      auto net_delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();                      auto usr_arrival = net_arrival + net_delay; -                    if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT || portClass == TMG_IGNORE || -                        portClass == TMG_CLOCK_INPUT) { +                    if (portClass == TMG_ENDPOINT || portClass == TMG_IGNORE || portClass == TMG_CLOCK_INPUT) {                          // Skip                      } else {                          auto budget_override = ctx->getBudgetOverride(net, usr, net_delay); @@ -410,7 +413,6 @@ struct Timing                  while (crit_net) {                      const PortInfo *crit_ipin = nullptr;                      delay_t max_arrival = std::numeric_limits<delay_t>::min(); -                      // Look at all input ports on its driving cell                      for (const auto &port : crit_net->driver.cell->ports) {                          if (port.second.type != PORT_IN || !port.second.net) @@ -424,14 +426,21 @@ struct Timing                          int port_clocks;                          TimingPortClass portClass =                                  ctx->getPortTimingClass(crit_net->driver.cell, port.first, port_clocks); -                        if (portClass == TMG_REGISTER_INPUT || portClass == TMG_CLOCK_INPUT || -                            portClass == TMG_ENDPOINT || portClass == TMG_IGNORE) +                        if (portClass == TMG_CLOCK_INPUT || portClass == TMG_ENDPOINT || portClass == TMG_IGNORE || +                            portClass == TMG_REGISTER_INPUT)                              continue; -                          // And find the fanin net with the latest arrival time                          if (net_data.count(port.second.net) &&                              net_data.at(port.second.net).count(crit_pair.first.start)) { -                            const auto net_arrival = net_data.at(port.second.net).at(crit_pair.first.start).max_arrival; +                            auto net_arrival = net_data.at(port.second.net).at(crit_pair.first.start).max_arrival; +                            if (net_delays) { +                                for (auto &user : port.second.net->users) +                                    if (user.port == port.first && user.cell == crit_net->driver.cell) { +                                        net_arrival += ctx->getNetinfoRouteDelay(port.second.net, user); +                                        break; +                                    } +                            } +                            net_arrival += comb_delay.maxDelay();                              if (net_arrival > max_arrival) {                                  max_arrival = net_arrival;                                  crit_ipin = &port.second; @@ -441,7 +450,6 @@ struct Timing                      if (!crit_ipin)                          break; -                      // Now convert PortInfo* into a PortRef*                      for (auto &usr : crit_ipin->net->users) {                          if (usr.cell->name == crit_net->driver.cell->name && usr.port == crit_ipin->name) { @@ -454,6 +462,181 @@ struct Timing                  std::reverse(cp_ports.begin(), cp_ports.end());              }          } + +        if (net_crit) { +            NPNR_ASSERT(crit_path); +            // Go through in reverse topographical order to set required times +            for (auto net : boost::adaptors::reverse(topographical_order)) { +                if (!net_data.count(net)) +                    continue; +                auto &nd_map = net_data.at(net); +                for (auto &startdomain : nd_map) { +                    auto &nd = startdomain.second; +                    if (nd.false_startpoint) +                        continue; +                    if (startdomain.first.clock == async_clock) +                        continue; +                    if (nd.min_required.empty()) +                        nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max()); +                    delay_t net_min_required = std::numeric_limits<delay_t>::max(); +                    for (size_t i = 0; i < net->users.size(); i++) { +                        auto &usr = net->users.at(i); +                        auto net_delay = ctx->getNetinfoRouteDelay(net, usr); +                        int port_clocks; +                        TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks); +                        if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) { +                            auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) { +                                delay_t period; +                                // Set default period +                                if (edge == startdomain.first.edge) { +                                    period = clk_period; +                                } else { +                                    period = clk_period / 2; +                                } +                                if (clksig != async_clock) { +                                    if (ctx->nets.at(clksig)->clkconstr) { +                                        if (edge == startdomain.first.edge) { +                                            // same edge +                                            period = ctx->nets.at(clksig)->clkconstr->period.minDelay(); +                                        } else if (edge == RISING_EDGE) { +                                            // falling -> rising +                                            period = ctx->nets.at(clksig)->clkconstr->low.minDelay(); +                                        } else if (edge == FALLING_EDGE) { +                                            // rising -> falling +                                            period = ctx->nets.at(clksig)->clkconstr->high.minDelay(); +                                        } +                                    } +                                } +                                nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i)); +                            }; +                            if (portClass == TMG_REGISTER_INPUT) { +                                for (int j = 0; j < port_clocks; j++) { +                                    TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j); +                                    const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port); +                                    IdString clksig = clknet ? clknet->name : async_clock; +                                    process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE, +                                                     clkInfo.setup.maxDelay()); +                                } +                            } else { +                                process_endpoint(async_clock, RISING_EDGE, 0); +                            } +                        } +                        net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay); +                    } +                    PortRef &drv = net->driver; +                    if (drv.cell == nullptr) +                        continue; +                    for (const auto &port : drv.cell->ports) { +                        if (port.second.type != PORT_IN || !port.second.net) +                            continue; +                        DelayInfo comb_delay; +                        bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay); +                        if (!is_path) +                            continue; +                        int cc; +                        auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc); +                        if (pclass != TMG_COMB_INPUT) +                            continue; +                        NetInfo *sink_net = port.second.net; +                        if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) { +                            auto &sink_nd = net_data.at(sink_net).at(startdomain.first); +                            if (sink_nd.min_required.empty()) +                                sink_nd.min_required.resize(sink_net->users.size(), +                                                            std::numeric_limits<delay_t>::max()); +                            for (size_t i = 0; i < sink_net->users.size(); i++) { +                                auto &user = sink_net->users.at(i); +                                if (user.cell == drv.cell && user.port == port.first) { +                                    sink_nd.min_required.at(i) = net_min_required - comb_delay.maxDelay(); +                                    break; +                                } +                            } +                        } +                    } +                } +            } +            std::unordered_map<ClockEvent, delay_t> worst_slack; + +            // Assign slack values +            for (auto &net_entry : net_data) { +                const NetInfo *net = net_entry.first; +                for (auto &startdomain : net_entry.second) { +                    auto &nd = startdomain.second; +                    if (startdomain.first.clock == async_clock) +                        continue; +                    if (nd.min_required.empty()) +                        continue; +                    auto &nc = (*net_crit)[net->name]; +                    if (nc.slack.empty()) +                        nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max()); +#if 0 +                    if (ctx->debug) +                        log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx)); +#endif +                    for (size_t i = 0; i < net->users.size(); i++) { +                        delay_t slack = nd.min_required.at(i) - +                                        (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i))); +#if 0 +                        if (ctx->debug) +                            log_info("    user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n", +                                    net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx), +                                    ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival), +                                    ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack)); +#endif +                        if (worst_slack.count(startdomain.first)) +                            worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack); +                        else +                            worst_slack[startdomain.first] = slack; +                        nc.slack.at(i) = slack; +                    } +                    if (ctx->debug) +                        log_break(); +                } +            } +            // Assign criticality values +            for (auto &net_entry : net_data) { +                const NetInfo *net = net_entry.first; +                for (auto &startdomain : net_entry.second) { +                    if (startdomain.first.clock == async_clock) +                        continue; +                    auto &nd = startdomain.second; +                    if (nd.min_required.empty()) +                        continue; +                    auto &nc = (*net_crit)[net->name]; +                    if (nc.slack.empty()) +                        continue; +                    if (nc.criticality.empty()) +                        nc.criticality.resize(net->users.size(), 0); +                    // Only consider intra-clock paths for criticality +                    if (!crit_path->count(ClockPair{startdomain.first, startdomain.first})) +                        continue; +                    delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; +                    for (size_t i = 0; i < net->users.size(); i++) { +                        float criticality = +                                1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); +                        nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality)); +                    } +                    nc.max_path_length = nd.max_path_length; +                    nc.cd_worst_slack = worst_slack.at(startdomain.first); +                } +            } +#if 0 +            if (ctx->debug) { +                for (auto &nc : *net_crit) { +                    NetInfo *net = ctx->nets.at(nc.first).get(); +                    log_info("Net %s maxlen %d worst_slack %.02fns: \n", nc.first.c_str(ctx), nc.second.max_path_length, +                             ctx->getDelayNS(nc.second.cd_worst_slack)); +                    if (!nc.second.criticality.empty() && !nc.second.slack.empty()) { +                        for (size_t i = 0; i < net->users.size(); i++) { +                            log_info("   user %s.%s slack %.02fns crit %.03f\n", net->users.at(i).cell->name.c_str(ctx), +                                     net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nc.second.slack.at(i)), +                                     nc.second.criticality.at(i)); +                        } +                    } +                    log_break(); +                } +            } +#endif +        }          return min_slack;      } @@ -601,6 +784,7 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p              int port_clocks;              auto portClass = ctx->getPortTimingClass(front_driver.cell, front_driver.port, port_clocks);              IdString last_port = front_driver.port; +            int clock_start = -1;              if (portClass == TMG_REGISTER_OUTPUT) {                  for (int i = 0; i < port_clocks; i++) {                      TimingClockingInfo clockInfo = ctx->getPortClockingInfo(front_driver.cell, front_driver.port, i); @@ -608,8 +792,7 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p                      if (clknet != nullptr && clknet->name == clocks.start.clock &&                          clockInfo.edge == clocks.start.edge) {                          last_port = clockInfo.clock_port; -                        total += clockInfo.clockToQ.maxDelay(); -                        logic_total += clockInfo.clockToQ.maxDelay(); +                        clock_start = i;                          break;                      }                  } @@ -623,11 +806,15 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p                  auto &driver = net->driver;                  auto driver_cell = driver.cell;                  DelayInfo comb_delay; -                if (last_port == driver.port) { +                if (clock_start != -1) { +                    auto clockInfo = ctx->getPortClockingInfo(driver_cell, driver.port, clock_start); +                    comb_delay = clockInfo.clockToQ; +                    clock_start = -1; +                } else if (last_port == driver.port) {                      // Case where we start with a STARTPOINT etc                      comb_delay = ctx->getDelayFromNS(0);                  } else { -                    ctx->getCellDelay(sink_cell, last_port, driver.port, comb_delay); +                    ctx->getCellDelay(driver_cell, last_port, driver.port, comb_delay);                  }                  total += comb_delay.maxDelay();                  logic_total += comb_delay.maxDelay(); @@ -651,6 +838,10 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p                      auto cursor = sink_wire;                      delay_t delay;                      while (driver_wire != cursor) { +#ifdef ARCH_ECP5 +                        if (net->is_global) +                            break; +#endif                          auto it = net->wires.find(cursor);                          assert(it != net->wires.end());                          auto pip = it->second.pip; @@ -713,6 +904,9 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p              if (!warn_on_failure || passed)                  log_info("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "",                           clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); +            else if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false)) +                log_warning("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", +                            clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target);              else                  log_nonfatal_error("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "",                                     clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); @@ -744,8 +938,7 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p          unsigned bar_width = 60;          auto min_slack = slack_histogram.begin()->first;          auto max_slack = slack_histogram.rbegin()->first; -        auto bin_size = std::max(1u, (max_slack - min_slack) / num_bins); -        num_bins = std::min((max_slack - min_slack) / bin_size, num_bins) + 1; +        auto bin_size = std::max<unsigned>(1, ceil((max_slack - min_slack + 1) / float(num_bins)));          std::vector<unsigned> bins(num_bins);          unsigned max_freq = 0;          for (const auto &i : slack_histogram) { @@ -766,4 +959,12 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p      }  } +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit) +{ +    CriticalPathMap crit_paths; +    net_crit->clear(); +    Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit); +    timing.walk_paths(); +} +  NEXTPNR_NAMESPACE_END diff --git a/common/timing.h b/common/timing.h index 42f928dc..f1d18e8a 100644 --- a/common/timing.h +++ b/common/timing.h @@ -32,6 +32,19 @@ void assign_budget(Context *ctx, bool quiet = false);  void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false,                       bool warn_on_failure = false); +// Data for the timing optimisation algorithm +struct NetCriticalityInfo +{ +    // One each per user +    std::vector<delay_t> slack; +    std::vector<float> criticality; +    unsigned max_path_length = 0; +    delay_t cd_worst_slack = std::numeric_limits<delay_t>::max(); +}; + +typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap; +void get_criticalities(Context *ctx, NetCriticalityMap *net_crit); +  NEXTPNR_NAMESPACE_END  #endif diff --git a/common/timing_opt.cc b/common/timing_opt.cc new file mode 100644 index 00000000..898222ab --- /dev/null +++ b/common/timing_opt.cc @@ -0,0 +1,626 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2018  David Shah <david@symbioticeda.com> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells + * on a critical path + * + * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs" + * https://www.cerc.utexas.edu/utda/publications/C205.pdf + * + * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles, + * and deal with the fact that not every cell on the crit path may be swappable. + */ + +#include "timing_opt.h" +#include <boost/range/adaptor/reversed.hpp> +#include <queue> +#include "nextpnr.h" +#include "timing.h" +#include "util.h" + +namespace std { + +template <> struct hash<std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX IdString>> +{ +    std::size_t +    operator()(const std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX IdString> &idp) const +            noexcept +    { +        std::size_t seed = 0; +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.first)); +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.second)); +        return seed; +    } +}; + +template <> struct hash<std::pair<int, NEXTPNR_NAMESPACE_PREFIX BelId>> +{ +    std::size_t operator()(const std::pair<int, NEXTPNR_NAMESPACE_PREFIX BelId> &idp) const noexcept +    { +        std::size_t seed = 0; +        boost::hash_combine(seed, hash<int>()(idp.first)); +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX BelId>()(idp.second)); +        return seed; +    } +}; +#ifndef ARCH_GENERIC +template <> struct hash<std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX BelId>> +{ +    std::size_t +    operator()(const std::pair<NEXTPNR_NAMESPACE_PREFIX IdString, NEXTPNR_NAMESPACE_PREFIX BelId> &idp) const noexcept +    { +        std::size_t seed = 0; +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(idp.first)); +        boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX BelId>()(idp.second)); +        return seed; +    } +}; +#endif +} // namespace std + +NEXTPNR_NAMESPACE_BEGIN + +class TimingOptimiser +{ +  public: +    TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){}; +    bool optimise() +    { +        log_info("Running timing-driven placement optimisation...\n"); +        ctx->lock(); +        if (ctx->verbose) +            timing_analysis(ctx, false, true, false, false); +        for (int i = 0; i < 30; i++) { +            log_info("   Iteration %d...\n", i); +            get_criticalities(ctx, &net_crit); +            setup_delay_limits(); +            auto crit_paths = find_crit_paths(0.98, 50000); +            for (auto &path : crit_paths) +                optimise_path(path); +            if (ctx->verbose) +                timing_analysis(ctx, false, true, false, false); +        } +        ctx->unlock(); +        return true; +    } + +  private: +    void setup_delay_limits() +    { +        max_net_delay.clear(); +        for (auto net : sorted(ctx->nets)) { +            NetInfo *ni = net.second; +            for (auto usr : ni->users) { +                max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max(); +            } +            if (!net_crit.count(net.first)) +                continue; +            auto &nc = net_crit.at(net.first); +            if (nc.slack.empty()) +                continue; +            for (size_t i = 0; i < ni->users.size(); i++) { +                auto &usr = ni->users.at(i); +                delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); +                if (nc.max_path_length != 0) { +                    max_net_delay[std::make_pair(usr.cell->name, usr.port)] = +                            net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10); +                } +            } +        } +    } + +    bool check_cell_delay_limits(CellInfo *cell) +    { +        for (const auto &port : cell->ports) { +            int nc; +            if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE) +                continue; +            NetInfo *net = port.second.net; +            if (net == nullptr) +                continue; +            if (port.second.type == PORT_IN) { +                if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) +                    continue; +                for (auto user : net->users) { +                    if (user.cell == cell && user.port == port.first) { +                        if (ctx->predictDelay(net, user) > +                            1.1 * max_net_delay.at(std::make_pair(cell->name, port.first))) +                            return false; +                    } +                } + +            } else if (port.second.type == PORT_OUT) { +                for (auto user : net->users) { +                    // This could get expensive for high-fanout nets?? +                    BelId dstBel = user.cell->bel; +                    if (dstBel == BelId()) +                        continue; +                    if (ctx->predictDelay(net, user) > +                        1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) { + +                        return false; +                    } +                } +            } +        } +        return true; +    } + +    BelId cell_swap_bel(CellInfo *cell, BelId newBel) +    { +        BelId oldBel = cell->bel; +        if (oldBel == newBel) +            return oldBel; +        CellInfo *other_cell = ctx->getBoundBelCell(newBel); +        NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); +        ctx->unbindBel(oldBel); +        if (other_cell != nullptr) { +            ctx->unbindBel(newBel); +            ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); +        } +        ctx->bindBel(newBel, cell, STRENGTH_WEAK); +        return oldBel; +    } + +    // Check that a series of moves are both legal and remain within maximum delay bounds +    // Moves are specified as a vector of pairs <cell, oldBel> +    bool acceptable_move(std::vector<std::pair<CellInfo *, BelId>> &move, bool check_delays = true) +    { +        for (auto &entry : move) { +            if (!ctx->isBelLocationValid(entry.first->bel)) +                return false; +            if (!ctx->isBelLocationValid(entry.second)) +                return false; +            if (!check_delays) +                continue; +            if (!check_cell_delay_limits(entry.first)) +                return false; +            // We might have swapped another cell onto the original bel. Check this for max delay violations +            // too +            CellInfo *swapped = ctx->getBoundBelCell(entry.second); +            if (swapped != nullptr && !check_cell_delay_limits(swapped)) +                return false; +        } +        return true; +    } + +    int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) +    { +        BelId curr = cell->bel; +        Loc curr_loc = ctx->getBelLocation(curr); +        int found_count = 0; +        cell_neighbour_bels[cell->name] = std::unordered_set<BelId>{}; +        for (int dy = -d; dy <= d; dy++) { +            for (int dx = -d; dx <= d; dx++) { +                // Go through all the Bels at this location +                // First, find all bels of the correct type that are either unbound or bound normally +                // Strongly bound bels are ignored +                // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros +                std::vector<BelId> free_bels_at_loc; +                std::vector<BelId> bound_bels_at_loc; +                for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) { +                    if (ctx->getBelType(bel) != cell->type) +                        continue; +                    CellInfo *bound = ctx->getBoundBelCell(bel); +                    if (bound == nullptr) { +                        free_bels_at_loc.push_back(bel); +                    } else if (bound->belStrength <= STRENGTH_WEAK && bound->constr_parent == nullptr && +                               bound->constr_children.empty()) { +                        bound_bels_at_loc.push_back(bel); +                    } +                } +                BelId candidate; + +                while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) { +                    BelId try_bel; +                    if (!free_bels_at_loc.empty()) { +                        int try_idx = ctx->rng(int(free_bels_at_loc.size())); +                        try_bel = free_bels_at_loc.at(try_idx); +                        free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx); +                    } else { +                        int try_idx = ctx->rng(int(bound_bels_at_loc.size())); +                        try_bel = bound_bels_at_loc.at(try_idx); +                        bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx); +                    } +                    if (bel_candidate_cells.count(try_bel) && !allow_swap) { +                        // Overlap is only allowed if it is with the previous cell (this is handled by removing those +                        // edges in the graph), or if allow_swap is true to deal with cases where overlap means few +                        // neighbours are identified +                        if (bel_candidate_cells.at(try_bel).size() > 1 || +                            (bel_candidate_cells.at(try_bel).size() == 1 && +                             *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) +                            continue; +                    } +                    // TODO: what else to check here? +                    candidate = try_bel; +                    break; +                } + +                if (candidate != BelId()) { +                    cell_neighbour_bels[cell->name].insert(candidate); +                    bel_candidate_cells[candidate].insert(cell->name); +                    // Work out if we need to delete any overlap +                    std::vector<IdString> overlap; +                    for (auto other : bel_candidate_cells[candidate]) +                        if (other != cell->name && other != prev_cell) +                            overlap.push_back(other); +                    if (overlap.size() > 0) +                        NPNR_ASSERT(allow_swap); +                    for (auto ov : overlap) { +                        bel_candidate_cells[candidate].erase(ov); +                        cell_neighbour_bels[ov].erase(candidate); +                    } +                } +            } +        } +        return found_count; +    } + +    std::vector<std::vector<PortRef *>> find_crit_paths(float crit_thresh, size_t max_count) +    { +        std::vector<std::vector<PortRef *>> crit_paths; +        std::vector<std::pair<NetInfo *, int>> crit_nets; +        std::vector<IdString> netnames; +        std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), +                       [](const std::pair<const IdString, std::unique_ptr<NetInfo>> &kv) { return kv.first; }); +        ctx->sorted_shuffle(netnames); +        for (auto net : netnames) { +            if (crit_nets.size() >= max_count) +                break; +            if (!net_crit.count(net)) +                continue; +            auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end()); +            if (*crit_user > crit_thresh) +                crit_nets.push_back( +                        std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); +        } + +        auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { +            NPNR_ASSERT(port.net != nullptr); +            for (size_t i = 0; i < port.net->users.size(); i++) { +                auto &usr = port.net->users.at(i); +                if (usr.cell == cell && usr.port == port.name) +                    return i; +            } +            NPNR_ASSERT_FALSE("port user not found on net"); +        }; +        std::unordered_set<PortRef *> used_ports; + +        for (auto crit_net : crit_nets) { + +            if (used_ports.count(&(crit_net.first->users.at(crit_net.second)))) +                continue; + +            std::deque<PortRef *> crit_path; + +            // FIXME: This will fail badly on combinational loops + +            // Iterate backwards following greatest criticality +            NetInfo *back_cursor = crit_net.first; +            while (back_cursor != nullptr) { +                float max_crit = 0; +                std::pair<NetInfo *, size_t> crit_sink{nullptr, 0}; +                CellInfo *cell = back_cursor->driver.cell; +                if (cell == nullptr) +                    break; +                for (auto port : cell->ports) { +                    if (port.second.type != PORT_IN) +                        continue; +                    NetInfo *pn = port.second.net; +                    if (pn == nullptr) +                        continue; +                    if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) +                        continue; +                    int ccount; +                    DelayInfo combDelay; +                    TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); +                    if (tpclass != TMG_COMB_INPUT) +                        continue; +                    bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); +                    if (!is_path) +                        continue; +                    size_t user_idx = port_user_index(cell, port.second); +                    float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); +                    if (used_ports.count(&(pn->users.at(user_idx)))) +                        continue; +                    if (usr_crit >= max_crit) { +                        max_crit = usr_crit; +                        crit_sink = std::make_pair(pn, user_idx); +                    } +                } + +                if (crit_sink.first != nullptr) { +                    crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second))); +                    used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); +                } +                back_cursor = crit_sink.first; +            } +            // Iterate forwards following greatest criticiality +            PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second)); +            while (fwd_cursor != nullptr) { +                crit_path.push_back(fwd_cursor); +                float max_crit = 0; +                std::pair<NetInfo *, size_t> crit_sink{nullptr, 0}; +                CellInfo *cell = fwd_cursor->cell; +                for (auto port : cell->ports) { +                    if (port.second.type != PORT_OUT) +                        continue; +                    NetInfo *pn = port.second.net; +                    if (pn == nullptr) +                        continue; +                    if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty()) +                        continue; +                    int ccount; +                    DelayInfo combDelay; +                    TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); +                    if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) +                        continue; +                    bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); +                    if (!is_path) +                        continue; +                    auto &crits = net_crit.at(pn->name).criticality; +                    for (size_t i = 0; i < crits.size(); i++) { +                        if (used_ports.count(&(pn->users.at(i)))) +                            continue; +                        if (crits.at(i) >= max_crit) { +                            max_crit = crits.at(i); +                            crit_sink = std::make_pair(pn, i); +                        } +                    } +                } +                if (crit_sink.first != nullptr) { +                    fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); +                    used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); +                } else { +                    fwd_cursor = nullptr; +                } +            } + +            std::vector<PortRef *> crit_path_vec; +            std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec)); +            crit_paths.push_back(crit_path_vec); +        } + +        return crit_paths; +    } + +    void optimise_path(std::vector<PortRef *> &path) +    { +        path_cells.clear(); +        cell_neighbour_bels.clear(); +        bel_candidate_cells.clear(); +        if (ctx->debug) +            log_info("Optimising the following path: \n"); + +        auto front_port = path.front(); +        NetInfo *front_net = front_port->cell->ports.at(front_port->port).net; +        if (front_net != nullptr && front_net->driver.cell != nullptr) { +            auto front_cell = front_net->driver.cell; +            if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) && +                front_cell->constr_parent == nullptr && front_cell->constr_children.empty()) { +                path_cells.push_back(front_cell->name); +            } +        } + +        for (auto port : path) { +            if (ctx->debug) { +                float crit = 0; +                NetInfo *pn = port->cell->ports.at(port->port).net; +                if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty()) +                    for (size_t i = 0; i < pn->users.size(); i++) +                        if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port) +                            crit = net_crit.at(pn->name).criticality.at(i); +                log_info("    %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), +                         ctx->getBelName(port->cell->bel).c_str(ctx), crit); +            } +            if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) +                continue; +            if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || +                port->cell->constr_parent != nullptr || !port->cell->constr_children.empty()) +                continue; +            if (ctx->debug) +                log_info("        can move\n"); +            path_cells.push_back(port->cell->name); +        } + +        if (path_cells.size() < 2) { +            if (ctx->debug) { +                log_info("Too few moveable cells; skipping path\n"); +                log_break(); +            } + +            return; +        } + +        // Calculate original delay before touching anything +        delay_t original_delay = 0; + +        for (size_t i = 0; i < path.size(); i++) { +            NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; +            for (size_t j = 0; j < pn->users.size(); j++) { +                auto &usr = pn->users.at(j); +                if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { +                    original_delay += ctx->predictDelay(pn, usr); +                    break; +                } +            } +        } + +        IdString last_cell; +        const int d = 2; // FIXME: how to best determine d +        for (auto cell : path_cells) { +            // FIXME: when should we allow swapping due to a lack of candidates +            find_neighbours(ctx->cells[cell].get(), last_cell, d, false); +            last_cell = cell; +        } + +        if (ctx->debug) { +            for (auto cell : path_cells) { +                log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), +                         ctx->getBelName(ctx->cells[cell]->bel).c_str(ctx)); +                for (auto neigh : cell_neighbour_bels.at(cell)) { +                    log_info("    %s\n", ctx->getBelName(neigh).c_str(ctx)); +                } +            } +        } + +        // Actual BFS path optimisation algorithm +        std::unordered_map<IdString, std::unordered_map<BelId, delay_t>> cumul_costs; +        std::unordered_map<std::pair<IdString, BelId>, std::pair<IdString, BelId>> backtrace; +        std::queue<std::pair<int, BelId>> visit; +        std::unordered_set<std::pair<int, BelId>> to_visit; + +        for (auto startbel : cell_neighbour_bels[path_cells.front()]) { +            // Swap for legality check +            CellInfo *cell = ctx->cells.at(path_cells.front()).get(); +            BelId origBel = cell_swap_bel(cell, startbel); +            std::vector<std::pair<CellInfo *, BelId>> move{std::make_pair(cell, origBel)}; +            if (acceptable_move(move)) { +                auto entry = std::make_pair(0, startbel); +                visit.push(entry); +                cumul_costs[path_cells.front()][startbel] = 0; +            } +            // Swap back +            cell_swap_bel(cell, origBel); +        } + +        while (!visit.empty()) { +            auto entry = visit.front(); +            visit.pop(); +            auto cellname = path_cells.at(entry.first); +            if (entry.first == int(path_cells.size()) - 1) +                continue; +            std::vector<std::pair<CellInfo *, BelId>> move; +            // Apply the entire backtrace for accurate legality and delay checks +            // This is probably pretty expensive (but also probably pales in comparison to the number of swaps +            // SA will make...) +            std::vector<std::pair<IdString, BelId>> route_to_entry; +            auto cursor = std::make_pair(cellname, entry.second); +            route_to_entry.push_back(cursor); +            while (backtrace.count(cursor)) { +                cursor = backtrace.at(cursor); +                route_to_entry.push_back(cursor); +            } +            for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) { +                CellInfo *cell = ctx->cells.at(rt_entry.first).get(); +                BelId origBel = cell_swap_bel(cell, rt_entry.second); +                move.push_back(std::make_pair(cell, origBel)); +            } + +            // Have a look at where we can travel from here +            for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) { +                // Edges between overlapping bels are deleted +                if (neighbour == entry.second) +                    continue; +                // Experimentally swap the next path cell onto the neighbour bel we are trying +                IdString ncname = path_cells.at(entry.first + 1); +                CellInfo *next_cell = ctx->cells.at(ncname).get(); +                BelId origBel = cell_swap_bel(next_cell, neighbour); +                move.push_back(std::make_pair(next_cell, origBel)); + +                delay_t total_delay = 0; + +                for (size_t i = 0; i < path.size(); i++) { +                    NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; +                    for (size_t j = 0; j < pn->users.size(); j++) { +                        auto &usr = pn->users.at(j); +                        if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { +                            total_delay += ctx->predictDelay(pn, usr); +                            break; +                        } +                    } +                    if (path.at(i)->cell == next_cell) +                        break; +                } + +                // First, check if the move is actually worthwhile from a delay point of view before the expensive +                // legality check +                if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) || +                    cumul_costs.at(ncname).at(neighbour) > total_delay) { +                    // Now check that the swaps we have made to get here are legal and meet max delay requirements +                    if (acceptable_move(move)) { +                        cumul_costs[ncname][neighbour] = total_delay; +                        backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second); +                        if (!to_visit.count(std::make_pair(entry.first + 1, neighbour))) +                            visit.push(std::make_pair(entry.first + 1, neighbour)); +                    } +                } +                // Revert the experimental swap +                cell_swap_bel(move.back().first, move.back().second); +                move.pop_back(); +            } + +            // Revert move by swapping cells back to their original order +            // Execute swaps in reverse order to how we made them originally +            for (auto move_entry : boost::adaptors::reverse(move)) { +                cell_swap_bel(move_entry.first, move_entry.second); +            } +        } + +        // Did we find a solution?? +        if (cumul_costs.count(path_cells.back())) { +            // Find the end position with the lowest total delay +            auto &end_options = cumul_costs.at(path_cells.back()); +            auto lowest = std::min_element(end_options.begin(), end_options.end(), +                                           [](const std::pair<BelId, delay_t> &a, const std::pair<BelId, delay_t> &b) { +                                               return a.second < b.second; +                                           }); +            NPNR_ASSERT(lowest != end_options.end()); + +            std::vector<std::pair<IdString, BelId>> route_to_solution; +            auto cursor = std::make_pair(path_cells.back(), lowest->first); +            route_to_solution.push_back(cursor); +            while (backtrace.count(cursor)) { +                cursor = backtrace.at(cursor); +                route_to_solution.push_back(cursor); +            } +            if (ctx->debug) +                log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", +                         ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay)); +            for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { +                CellInfo *cell = ctx->cells.at(rt_entry.first).get(); +                cell_swap_bel(cell, rt_entry.second); +                if (ctx->debug) +                    log_info("    %s at %s\n", rt_entry.first.c_str(ctx), ctx->getBelName(rt_entry.second).c_str(ctx)); +            } + +        } else { +            if (ctx->debug) +                log_info("Solution was not found\n"); +        } +        if (ctx->debug) +            log_break(); +    } + +    // Current candidate Bels for cells (linked in both direction> +    std::vector<IdString> path_cells; +    std::unordered_map<IdString, std::unordered_set<BelId>> cell_neighbour_bels; +    std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells; +    // Map cell ports to net delay limit +    std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay; +    // Criticality data from timing analysis +    NetCriticalityMap net_crit; +    Context *ctx; +    TimingOptCfg cfg; +}; + +bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } + +NEXTPNR_NAMESPACE_END diff --git a/common/timing_opt.h b/common/timing_opt.h new file mode 100644 index 00000000..ceb35c71 --- /dev/null +++ b/common/timing_opt.h @@ -0,0 +1,37 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2018  David Shah <david@symbioticeda.com> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "nextpnr.h" +#include "settings.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct TimingOptCfg : public Settings +{ +    TimingOptCfg(Context *ctx) : Settings(ctx) {} + +    // The timing optimiser will *only* optimise cells of these types +    // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense +    // for other cell types +    std::unordered_set<IdString> cellTypes; +}; + +extern bool timing_opt(Context *ctx, TimingOptCfg cfg); + +NEXTPNR_NAMESPACE_END | 
