From 49f178ed94b5fad00d25dbd12adea0bf4732f803 Mon Sep 17 00:00:00 2001 From: gatecat Date: Fri, 8 Apr 2022 13:42:54 +0100 Subject: Split up common into kernel,place,route Signed-off-by: gatecat --- common/place/fast_bels.h | 188 ++++ common/place/parallel_refine.cc | 959 ++++++++++++++++++++ common/place/parallel_refine.h | 43 + common/place/place_common.cc | 487 +++++++++++ common/place/place_common.h | 55 ++ common/place/placer1.cc | 1317 ++++++++++++++++++++++++++++ common/place/placer1.h | 45 + common/place/placer_heap.cc | 1830 +++++++++++++++++++++++++++++++++++++++ common/place/placer_heap.h | 58 ++ common/place/timing_opt.cc | 561 ++++++++++++ common/place/timing_opt.h | 37 + 11 files changed, 5580 insertions(+) create mode 100644 common/place/fast_bels.h create mode 100644 common/place/parallel_refine.cc create mode 100644 common/place/parallel_refine.h create mode 100644 common/place/place_common.cc create mode 100644 common/place/place_common.h create mode 100644 common/place/placer1.cc create mode 100644 common/place/placer1.h create mode 100644 common/place/placer_heap.cc create mode 100644 common/place/placer_heap.h create mode 100644 common/place/timing_opt.cc create mode 100644 common/place/timing_opt.h (limited to 'common/place') diff --git a/common/place/fast_bels.h b/common/place/fast_bels.h new file mode 100644 index 00000000..ba9938c6 --- /dev/null +++ b/common/place/fast_bels.h @@ -0,0 +1,188 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 Claire Xenia Wolf + * Copyright (C) 2018 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#pragma once + +#include +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +// FastBels is a lookup class that provides a fast lookup for finding BELs +// that support a given cell type. +struct FastBels +{ + struct TypeData + { + size_t type_index; + int number_of_possible_bels; + }; + + FastBels(Context *ctx, bool check_bel_available, int minBelsForGridPick) + : ctx(ctx), check_bel_available(check_bel_available), minBelsForGridPick(minBelsForGridPick) + { + } + + void addCellType(IdString cell_type) + { + auto iter = cell_types.find(cell_type); + if (iter != cell_types.end()) { + // This cell type has already been added to the fast BEL lookup. + return; + } + + size_t type_idx = cell_types.size(); + auto &cell_type_data = cell_types[cell_type]; + cell_type_data.type_index = type_idx; + + fast_bels_by_cell_type.resize(type_idx + 1); + auto &bel_data = fast_bels_by_cell_type.at(type_idx); + NPNR_ASSERT(bel_data.get() == nullptr); + bel_data = std::make_unique(); + + for (auto bel : ctx->getBels()) { + if (!ctx->isValidBelForCellType(cell_type, bel)) { + continue; + } + + cell_type_data.number_of_possible_bels += 1; + } + + for (auto bel : ctx->getBels()) { + if (check_bel_available && !ctx->checkBelAvail(bel)) { + continue; + } + + if (!ctx->isValidBelForCellType(cell_type, bel)) { + continue; + } + + Loc loc = ctx->getBelLocation(bel); + if (minBelsForGridPick >= 0 && cell_type_data.number_of_possible_bels < minBelsForGridPick) { + loc.x = loc.y = 0; + } + + if (int(bel_data->size()) < (loc.x + 1)) { + bel_data->resize(loc.x + 1); + } + + if (int(bel_data->at(loc.x).size()) < (loc.y + 1)) { + bel_data->at(loc.x).resize(loc.y + 1); + } + + bel_data->at(loc.x).at(loc.y).push_back(bel); + } + } + + void addBelBucket(BelBucketId partition) + { + auto iter = partition_types.find(partition); + if (iter != partition_types.end()) { + // This partition has already been added to the fast BEL lookup. + return; + } + + size_t type_idx = partition_types.size(); + auto &type_data = partition_types[partition]; + type_data.type_index = type_idx; + + fast_bels_by_partition_type.resize(type_idx + 1); + auto &bel_data = fast_bels_by_partition_type.at(type_idx); + NPNR_ASSERT(bel_data.get() == nullptr); + bel_data = std::make_unique(); + + for (auto bel : ctx->getBels()) { + if (ctx->getBelBucketForBel(bel) != partition) { + continue; + } + + type_data.number_of_possible_bels += 1; + } + + for (auto bel : ctx->getBels()) { + if (check_bel_available && !ctx->checkBelAvail(bel)) { + continue; + } + + if (ctx->getBelBucketForBel(bel) != partition) { + continue; + } + + Loc loc = ctx->getBelLocation(bel); + if (minBelsForGridPick >= 0 && type_data.number_of_possible_bels < minBelsForGridPick) { + loc.x = loc.y = 0; + } + + if (int(bel_data->size()) < (loc.x + 1)) { + bel_data->resize(loc.x + 1); + } + + if (int(bel_data->at(loc.x).size()) < (loc.y + 1)) { + bel_data->at(loc.x).resize(loc.y + 1); + } + + bel_data->at(loc.x).at(loc.y).push_back(bel); + } + } + + typedef std::vector>> FastBelsData; + + int getBelsForCellType(IdString cell_type, FastBelsData **data) + { + auto iter = cell_types.find(cell_type); + if (iter == cell_types.end()) { + addCellType(cell_type); + iter = cell_types.find(cell_type); + NPNR_ASSERT(iter != cell_types.end()); + } + + auto cell_type_data = iter->second; + + *data = fast_bels_by_cell_type.at(cell_type_data.type_index).get(); + return cell_type_data.number_of_possible_bels; + } + + size_t getBelsForBelBucket(BelBucketId partition, FastBelsData **data) + { + auto iter = partition_types.find(partition); + if (iter == partition_types.end()) { + addBelBucket(partition); + iter = partition_types.find(partition); + NPNR_ASSERT(iter != partition_types.end()); + } + + auto type_data = iter->second; + + *data = fast_bels_by_partition_type.at(type_data.type_index).get(); + return type_data.number_of_possible_bels; + } + + Context *ctx; + const bool check_bel_available; + const int minBelsForGridPick; + + dict cell_types; + std::vector> fast_bels_by_cell_type; + + dict partition_types; + std::vector> fast_bels_by_partition_type; +}; + +NEXTPNR_NAMESPACE_END diff --git a/common/place/parallel_refine.cc b/common/place/parallel_refine.cc new file mode 100644 index 00000000..a868ca58 --- /dev/null +++ b/common/place/parallel_refine.cc @@ -0,0 +1,959 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021-22 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "parallel_refine.h" +#include "log.h" + +#if !defined(__wasm) + +#include "fast_bels.h" +#include "scope_lock.h" +#include "timing.h" + +#include +#include +#include +#include +#include + +NEXTPNR_NAMESPACE_BEGIN + +namespace { +struct Partition +{ + int x0, y0, x1, y1; + std::vector cells; + Partition() = default; + explicit Partition(Context *ctx) + { + x0 = ctx->getGridDimX(); + y0 = ctx->getGridDimY(); + x1 = 0; + y1 = 0; + for (auto &cell : ctx->cells) { + Loc l = ctx->getBelLocation(cell.second->bel); + x0 = std::min(x0, l.x); + x1 = std::max(x1, l.x); + y0 = std::min(y0, l.y); + y1 = std::max(y1, l.y); + cells.push_back(cell.second.get()); + } + } + void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r) + { + std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) { + Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel); + return yaxis ? (l0.y < l1.y) : (l0.x < l1.x); + }); + size_t pivot_point = size_t(cells.size() * pivot); + l.cells.clear(); + r.cells.clear(); + l.cells.reserve(pivot_point); + r.cells.reserve(cells.size() - pivot_point); + int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1) + : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y + : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x); + for (size_t i = 0; i < cells.size(); i++) { + Loc loc = ctx->getBelLocation(cells.at(i)->bel); + ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i)); + } + if (yaxis) { + l.x0 = r.x0 = x0; + l.x1 = r.x1 = x1; + l.y0 = y0; + l.y1 = pivot_coord; + r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1); + r.y1 = y1; + } else { + l.y0 = r.y0 = y0; + l.y1 = r.y1 = y1; + l.x0 = x0; + l.x1 = pivot_coord; + r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1); + r.x1 = x1; + } + } +}; + +typedef int64_t wirelen_t; + +struct NetBB +{ + // Actual bounding box + int x0 = 0, x1 = 0, y0 = 0, y1 = 0; + // Number of cells at each extremity + int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; + wirelen_t hpwl(const ParallelRefineCfg &cfg) const + { + return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); + } + static NetBB compute(const Context *ctx, const NetInfo *net, const dict *cell2bel = nullptr) + { + NetBB result{}; + if (!net->driver.cell) + return result; + auto bel_loc = [&](const CellInfo *cell) { + BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel; + return ctx->getBelLocation(bel); + }; + result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1; + Loc drv_loc = bel_loc(net->driver.cell); + result.x0 = result.x1 = drv_loc.x; + result.y0 = result.y1 = drv_loc.y; + for (auto &usr : net->users) { + Loc l = bel_loc(usr.cell); + if (l.x == result.x0) + ++result.nx0; // on the edge + else if (l.x < result.x0) { + result.x0 = l.x; // extends the edge + result.nx0 = 1; + } + if (l.x == result.x1) + ++result.nx1; // on the edge + else if (l.x > result.x1) { + result.x1 = l.x; // extends the edge + result.nx1 = 1; + } + if (l.y == result.y0) + ++result.ny0; // on the edge + else if (l.y < result.y0) { + result.y0 = l.y; // extends the edge + result.ny0 = 1; + } + if (l.y == result.y1) + ++result.ny1; // on the edge + else if (l.y > result.y1) { + result.y1 = l.y; // extends the edge + result.ny1 = 1; + } + } + return result; + } +}; + +struct GlobalState +{ + explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){}; + Context *ctx; + ParallelRefineCfg cfg; + FastBels bels; + std::vector flat_nets; // flat array of all nets in the design for fast referencing by index + std::vector last_bounds; + std::vector> last_tmg_costs; + dict region_bounds; + TimingAnalyser tmg; + + std::shared_timed_mutex archapi_mutex; + + double temperature = 1e-7; + int radius = 3; + + wirelen_t total_wirelen = 0; + double total_timing_cost = 0; + + double get_timing_cost(const NetInfo *net, store_index user, + const dict *cell2bel = nullptr) + { + if (!net->driver.cell) + return 0; + const auto &sink = net->users.at(user); + IdString driver_pin, sink_pin; + // Pick the first pin for a prediction; assume all will be similar enouhg + for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) { + driver_pin = pin; + break; + } + for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) { + sink_pin = pin; + break; + } + float crit = tmg.get_criticality(CellPortKey(sink)); + BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel; + BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel; + double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin)); + return delay * std::pow(crit, cfg.crit_exp); + } + + bool skip_net(const NetInfo *net) const + { + if (!net->driver.cell) + return true; + if (ctx->getBelGlobalBuf(net->driver.cell->bel)) + return true; + return false; + } + bool timing_skip_net(const NetInfo *net) const + { + if (!net->driver.cell) + return true; + int cc; + auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc); + if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK) + return true; + return false; + } + // .... +}; + +struct ThreadState +{ + Context *ctx; // Nextpnr context pointer + GlobalState &g; // Refinement engine state + int idx; // Index of the thread + DeterministicRNG rng; // Local RNG + // The cell partition that the thread works on + Partition p; + // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can + // optimise + std::vector thread_net_idx; + // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's + // perspective + std::vector thread_nets; + std::vector net_bounds; + std::vector> arc_tmg_cost; + std::vector ignored_nets, tmg_ignored_nets; + bool arch_state_dirty = false; + // Our local cell-bel map; that won't be affected by out-of-partition moves + dict local_cell2bel; + + // Data on an inflight move + dict> moved_cells; // cell -> (old; new) + // For cluster moves only + std::vector> cell_rel; + // For incremental wirelength and delay updates + wirelen_t wirelen_delta = 0; + double timing_delta = 0; + + // Total made and accepted moved + int n_move = 0, n_accept = 0; + + enum BoundChange + { + NO_CHANGE, + CELL_MOVED_INWARDS, + CELL_MOVED_OUTWARDS, + FULL_RECOMPUTE + }; + // Wirelen related are handled on a per-axis basis to reduce + struct AxisChanges + { + std::vector bounds_changed_nets; + std::vector already_bounds_changed; + }; + std::array axes; + std::vector new_net_bounds; + + std::vector> already_timing_changed; + std::vector>> timing_changed_arcs; + std::vector new_timing_costs; + + ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){}; + void set_partition(const Partition &part) + { + p = part; + thread_nets.clear(); + thread_net_idx.resize(g.flat_nets.size()); + std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1); + // Determine the set of nets that are within the thread; and therefore we care about + for (auto thread_cell : part.cells) { + for (auto &port : thread_cell->ports) { + if (!port.second.net) + continue; + int global_idx = port.second.net->udata; + auto &thread_idx = thread_net_idx.at(global_idx); + // Already added to the set + if (thread_idx != -1) + continue; + thread_idx = thread_nets.size(); + thread_nets.push_back(port.second.net); + } + } + tmg_ignored_nets.clear(); + ignored_nets.clear(); + for (auto tn : thread_nets) { + ignored_nets.push_back(g.skip_net(tn)); + tmg_ignored_nets.push_back(g.timing_skip_net(tn)); + } + // Set up the original cell-bel map for all nets inside the thread + local_cell2bel.clear(); + for (NetInfo *net : thread_nets) { + if (net->driver.cell) + local_cell2bel[net->driver.cell->name] = net->driver.cell->bel; + for (auto &usr : net->users) + local_cell2bel[usr.cell->name] = usr.cell->bel; + } + } + void setup_initial_state() + { + // Setup initial net bounding boxes and timing costs + net_bounds.clear(); + arc_tmg_cost.clear(); + for (auto tn : thread_nets) { + net_bounds.push_back(g.last_bounds.at(tn->udata)); + arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata)); + } + new_net_bounds = net_bounds; + for (int j = 0; j < 2; j++) { + auto &a = axes.at(j); + a.already_bounds_changed.resize(net_bounds.size()); + } + already_timing_changed.clear(); + already_timing_changed.resize(net_bounds.size()); + for (size_t i = 0; i < thread_nets.size(); i++) + already_timing_changed.at(i) = std::vector(thread_nets.at(i)->users.capacity()); + } + bool bounds_check(BelId bel) + { + Loc l = ctx->getBelLocation(bel); + if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1) + return false; + return true; + } + bool bind_move() + { + std::unique_lock l(g.archapi_mutex); + for (auto &entry : moved_cells) { + ctx->unbindBel(entry.second.first); + } + bool success = true; + for (auto &entry : moved_cells) { + // Make sure targets are available before we bind them + if (!ctx->checkBelAvail(entry.second.second)) { + success = false; + break; + } + ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); + } + arch_state_dirty = true; + return success; + } + bool check_validity() + { + std::shared_lock l(g.archapi_mutex); + bool result = true; + for (auto e : moved_cells) { + if (!ctx->isBelLocationValid(e.second.first)) { + // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer + // enabling dedicated routes to be used + result = false; + break; + } + if (!ctx->isBelLocationValid(e.second.second)) { + result = false; + break; + } + } + return result; + } + void revert_move() + { + if (arch_state_dirty) { + // If changes to the arch state were made, revert them by restoring original cell bindings + std::unique_lock l(g.archapi_mutex); + for (auto &entry : moved_cells) { + BelId curr_bound = ctx->cells.at(entry.first)->bel; + if (curr_bound != BelId()) + ctx->unbindBel(curr_bound); + } + for (auto &entry : moved_cells) { + ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); + } + arch_state_dirty = false; + } + for (auto &entry : moved_cells) + local_cell2bel[entry.first] = entry.second.first; + } + void commit_move() + { + arch_state_dirty = false; + for (auto &axis : axes) { + for (auto bc : axis.bounds_changed_nets) { + // Commit updated net bounds + net_bounds.at(bc) = new_net_bounds.at(bc); + } + } + if (g.cfg.timing_driven) { + NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size()); + for (size_t i = 0; i < timing_changed_arcs.size(); i++) { + auto arc = timing_changed_arcs.at(i); + arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i); + } + } + } + void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel) + { + Loc new_loc = ctx->getBelLocation(new_bel); + Loc old_loc = ctx->getBelLocation(old_bel); + for (const auto &port : cell->ports) { + NetInfo *pn = port.second.net; + if (!pn) + continue; + int idx = thread_net_idx.at(pn->udata); + if (ignored_nets.at(idx)) + continue; + NetBB &new_bounds = new_net_bounds.at(idx); + // For the x-axis (i=0) and y-axis (i=1) + for (int i = 0; i < 2; i++) { + auto &axis = axes.at(i); + // New and old on this axis + int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x; + // References to updated bounding box entries + auto &b0 = i ? new_bounds.y0 : new_bounds.x0; + auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0; + auto &b1 = i ? new_bounds.y1 : new_bounds.x1; + auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1; + auto &change = axis.already_bounds_changed.at(idx); + // Lower bound + if (new_pos < b0) { + // Further out than current lower bound + b0 = new_pos; + n0 = 1; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (new_pos == b0 && old_pos > b0) { + // Moved from inside into current bound + ++n0; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (old_pos == b0 && new_pos > b0) { + // Moved from current bound to inside + if (change == NO_CHANGE) + axis.bounds_changed_nets.push_back(idx); + if (n0 == 1) { + // Was the last cell on the bound; have to do a full recompute + change = FULL_RECOMPUTE; + } else { + --n0; + if (change == NO_CHANGE) + change = CELL_MOVED_INWARDS; + } + } + // Upper bound + if (new_pos > b1) { + // Further out than current upper bound + b1 = new_pos; + n1 = new_pos; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (new_pos == b1 && old_pos < b1) { + // Moved onto current bound + ++n1; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (old_pos == b1 && new_pos < b1) { + // Moved from current bound to inside + if (change == NO_CHANGE) + axis.bounds_changed_nets.push_back(idx); + if (n1 == 1) { + // Was the last cell on the bound; have to do a full recompute + change = FULL_RECOMPUTE; + } else { + --n1; + if (change == NO_CHANGE) + change = CELL_MOVED_INWARDS; + } + } + } + // Timing updates if timing driven + if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) { + if (port.second.type == PORT_OUT) { + int cc; + TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); + if (cls != TMG_IGNORE) { + for (auto usr : pn->users.enumerate()) + if (!already_timing_changed.at(idx).at(usr.index.idx())) { + timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index)); + already_timing_changed.at(idx).at(usr.index.idx()) = true; + } + } + } else { + auto usr = port.second.user_idx; + if (!already_timing_changed.at(idx).at(usr.idx())) { + timing_changed_arcs.emplace_back(std::make_pair(idx, usr)); + already_timing_changed.at(idx).at(usr.idx()) = true; + } + } + } + } + } + void compute_total_change() + { + auto &xa = axes.at(0), &ya = axes.at(1); + for (auto &bc : xa.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE) + new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); + for (auto &bc : ya.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && + ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE) + new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); + for (auto &bc : xa.bounds_changed_nets) + wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); + for (auto &bc : ya.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) == NO_CHANGE) + wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); + if (g.cfg.timing_driven) { + NPNR_ASSERT(new_timing_costs.empty()); + for (auto arc : timing_changed_arcs) { + double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel); + timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx())); + new_timing_costs.push_back(new_cost); + } + } + } + void reset_move_state() + { + moved_cells.clear(); + cell_rel.clear(); + for (auto &axis : axes) { + for (auto bc : axis.bounds_changed_nets) { + new_net_bounds.at(bc) = net_bounds.at(bc); + axis.already_bounds_changed[bc] = NO_CHANGE; + } + axis.bounds_changed_nets.clear(); + } + for (auto &arc : timing_changed_arcs) { + already_timing_changed.at(arc.first).at(arc.second.idx()) = false; + } + timing_changed_arcs.clear(); + new_timing_costs.clear(); + wirelen_delta = 0; + timing_delta = 0; + } + + bool accept_move() + { + static constexpr double epsilon = 1e-20; + double delta = g.cfg.lambda * (timing_delta / std::max(epsilon, g.total_timing_cost)) + + (1.0 - g.cfg.lambda) * (double(wirelen_delta) / std::max(epsilon, g.total_wirelen)); + return delta < 0 || + (g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature)); + } + + bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel) + { + if (!bounds_check(old_bel) || !bounds_check(new_bel)) + return false; + if (!ctx->isValidBelForCellType(cell->type, new_bel)) + return false; + NPNR_ASSERT(!moved_cells.count(cell->name)); + moved_cells[cell->name] = std::make_pair(old_bel, new_bel); + local_cell2bel[cell->name] = new_bel; + compute_changes_for_cell(cell, old_bel, new_bel); + return true; + } + + bool single_cell_swap(CellInfo *cell, BelId new_bel) + { + NPNR_ASSERT(moved_cells.empty()); + BelId old_bel = cell->bel; + CellInfo *bound = ctx->getBoundBelCell(new_bel); + if (bound && (bound->belStrength > STRENGTH_STRONG || bound->cluster != ClusterId())) + return false; + if (!add_to_move(cell, old_bel, new_bel)) + goto fail; + if (bound && !add_to_move(bound, new_bel, old_bel)) + goto fail; + compute_total_change(); + // SA acceptance criteria + + if (!accept_move()) { + // SA fail + goto fail; + } + // Check validity rules + if (!bind_move()) + goto fail; + if (!check_validity()) + goto fail; + // Accepted! + commit_move(); + reset_move_state(); + return true; + fail: + revert_move(); + reset_move_state(); + return false; + } + + bool chain_swap(CellInfo *root_cell, BelId new_root_bel) + { + NPNR_ASSERT(moved_cells.empty()); + std::queue> displaced_clusters; + pool used_bels; + displaced_clusters.emplace(root_cell->cluster, new_root_bel); + while (!displaced_clusters.empty()) { + std::vector> dest_bels; + auto cursor = displaced_clusters.front(); + displaced_clusters.pop(); + if (!ctx->getClusterPlacement(cursor.first, cursor.second, dest_bels)) + goto fail; + for (const auto &db : dest_bels) { + BelId old_bel = db.first->bel; + if (moved_cells.count(db.first->name)) + goto fail; + if (!add_to_move(db.first, old_bel, db.second)) + goto fail; + if (used_bels.count(db.second)) + goto fail; + used_bels.insert(db.second); + + CellInfo *bound = ctx->getBoundBelCell(db.second); + if (bound) { + if (moved_cells.count(bound->name)) { + // Don't move a cell multiple times in the same go + goto fail; + } else if (bound->belStrength > STRENGTH_STRONG) { + goto fail; + } else if (bound->cluster != ClusterId()) { + // Displace the entire cluster + Loc old_loc = ctx->getBelLocation(old_bel); + Loc bound_loc = ctx->getBelLocation(bound->bel); + Loc root_loc = ctx->getBelLocation(ctx->getClusterRootCell(bound->cluster)->bel); + BelId new_root = ctx->getBelByLocation(Loc(old_loc.x + (root_loc.x - bound_loc.x), + old_loc.y + (root_loc.y - bound_loc.y), + old_loc.z + (root_loc.z - bound_loc.z))); + if (new_root == BelId()) + goto fail; + displaced_clusters.emplace(bound->cluster, new_root); + } else { + // Single cell swap + if (used_bels.count(old_bel)) + goto fail; + used_bels.insert(old_bel); + if (!add_to_move(bound, bound->bel, old_bel)) + goto fail; + } + } else if (!ctx->checkBelAvail(db.second)) { + goto fail; + } + } + } + compute_total_change(); + // SA acceptance criteria + + if (!accept_move()) { + // SA fail + goto fail; + } + // Check validity rules + if (!bind_move()) + goto fail; + if (!check_validity()) + goto fail; + // Accepted! + commit_move(); + reset_move_state(); + return true; + fail: + revert_move(); + reset_move_state(); + return false; + } + + BelId random_bel_for_cell(CellInfo *cell, int force_z = -1) + { + IdString targetType = cell->type; + Loc curr_loc = ctx->getBelLocation(cell->bel); + int count = 0; + + int dx = g.radius, dy = g.radius; + if (cell->region != nullptr && cell->region->constr_bels) { + dx = std::min(g.cfg.hpwl_scale_x * g.radius, + (g.region_bounds[cell->region->name].x1 - g.region_bounds[cell->region->name].x0) + 1); + dy = std::min(g.cfg.hpwl_scale_y * g.radius, + (g.region_bounds[cell->region->name].y1 - g.region_bounds[cell->region->name].y0) + 1); + // Clamp location to within bounds + curr_loc.x = std::max(g.region_bounds[cell->region->name].x0, curr_loc.x); + curr_loc.x = std::min(g.region_bounds[cell->region->name].x1, curr_loc.x); + curr_loc.y = std::max(g.region_bounds[cell->region->name].y0, curr_loc.y); + curr_loc.y = std::min(g.region_bounds[cell->region->name].y1, curr_loc.y); + } + + FastBels::FastBelsData *bel_data; + auto type_cnt = g.bels.getBelsForCellType(targetType, &bel_data); + + while (true) { + int nx = rng.rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0); + int ny = rng.rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0); + if (type_cnt < 64) + nx = ny = 0; + if (nx >= int(bel_data->size())) + continue; + if (ny >= int(bel_data->at(nx).size())) + continue; + const auto &fb = bel_data->at(nx).at(ny); + if (fb.size() == 0) + continue; + BelId bel = fb.at(rng.rng(int(fb.size()))); + if (!bounds_check(bel)) + continue; + if (force_z != -1) { + Loc loc = ctx->getBelLocation(bel); + if (loc.z != force_z) + continue; + } + if (!cell->testRegion(bel)) + continue; + count++; + return bel; + } + } + + void run_iter() + { + setup_initial_state(); + n_accept = 0; + n_move = 0; + for (int m = 0; m < g.cfg.inner_iters; m++) { + for (auto cell : p.cells) { + if (cell->belStrength > STRENGTH_STRONG) + continue; + if (cell->cluster != ClusterId()) { + if (cell != ctx->getClusterRootCell(cell->cluster)) + continue; // only move cluster root + Loc old_loc = ctx->getBelLocation(cell->bel); + BelId new_root = random_bel_for_cell(cell, old_loc.z); + if (new_root == BelId() || new_root == cell->bel) + continue; + ++n_move; + if (chain_swap(cell, new_root)) + ++n_accept; + } else { + BelId new_bel = random_bel_for_cell(cell); + if (new_bel == BelId() || new_bel == cell->bel) + continue; + ++n_move; + if (single_cell_swap(cell, new_bel)) + ++n_accept; + } + } + } + } +}; + +struct ParallelRefine +{ + Context *ctx; + GlobalState g; + std::vector t; + ParallelRefine(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), g(ctx, cfg) + { + g.flat_nets.reserve(ctx->nets.size()); + for (auto &net : ctx->nets) { + net.second->udata = g.flat_nets.size(); + g.flat_nets.push_back(net.second.get()); + } + // Setup per thread context + for (int i = 0; i < cfg.threads; i++) { + t.emplace_back(ctx, g, i); + } + // Setup region bounds + for (auto ®ion : ctx->region) { + Region *r = region.second.get(); + NetBB bb; + if (r->constr_bels) { + bb.x0 = std::numeric_limits::max(); + bb.x1 = std::numeric_limits::min(); + bb.y0 = std::numeric_limits::max(); + bb.y1 = std::numeric_limits::min(); + for (auto bel : r->bels) { + Loc loc = ctx->getBelLocation(bel); + bb.x0 = std::min(bb.x0, loc.x); + bb.x1 = std::max(bb.x1, loc.x); + bb.y0 = std::min(bb.y0, loc.y); + bb.y1 = std::max(bb.y1, loc.y); + } + } else { + bb.x0 = 0; + bb.y0 = 0; + bb.x1 = ctx->getGridDimX(); + bb.y1 = ctx->getGridDimY(); + } + g.region_bounds[r->name] = bb; + } + // Setup fast bels map + pool cell_types_in_use; + for (auto &cell : ctx->cells) { + IdString cell_type = cell.second->type; + cell_types_in_use.insert(cell_type); + } + + for (auto cell_type : cell_types_in_use) { + g.bels.addCellType(cell_type); + } + }; + std::vector parts; + void do_partition() + { + parts.clear(); + parts.emplace_back(ctx); + bool yaxis = false; + while (parts.size() < t.size()) { + std::vector next(parts.size() * 2); + for (size_t i = 0; i < parts.size(); i++) { + // Randomly permute pivot every iteration so we get different thread boundaries + const float delta = 0.1; + float pivot = (0.5 - (delta / 2)) + (delta / 2) * (ctx->rng(10000) / 10000.0f); + parts.at(i).split(ctx, yaxis, pivot, next.at(i * 2), next.at(i * 2 + 1)); + } + std::swap(parts, next); + yaxis = !yaxis; + } + + NPNR_ASSERT(parts.size() == t.size()); + // TODO: thread pool to make this worthwhile... + std::vector workers; + for (size_t i = 0; i < t.size(); i++) { + workers.emplace_back([i, this]() { t.at(i).set_partition(parts.at(i)); }); + } + for (auto &w : workers) + w.join(); + } + + void update_global_costs() + { + g.last_bounds.resize(g.flat_nets.size()); + g.last_tmg_costs.resize(g.flat_nets.size()); + g.total_wirelen = 0; + g.total_timing_cost = 0; + for (size_t i = 0; i < g.flat_nets.size(); i++) { + NetInfo *ni = g.flat_nets.at(i); + if (g.skip_net(ni)) + continue; + g.last_bounds.at(i) = NetBB::compute(ctx, ni); + g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg); + if (!g.timing_skip_net(ni)) { + auto &tc = g.last_tmg_costs.at(i); + tc.resize(ni->users.capacity()); + for (auto usr : ni->users.enumerate()) { + tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index); + g.total_timing_cost += tc.at(usr.index.idx()); + } + } + } + } + void run() + { + + ScopeLock lock(ctx); + auto refine_start = std::chrono::high_resolution_clock::now(); + + g.tmg.setup_only = true; + g.tmg.setup(); + do_partition(); + log_info("Running parallel refinement with %d threads.\n", int(t.size())); + int iter = 1; + bool done = false; + update_global_costs(); + double avg_wirelen = g.total_wirelen; + wirelen_t min_wirelen = g.total_wirelen; + while (true) { + if (iter > 1) { + if (g.total_wirelen >= min_wirelen) { + done = true; + } else if (g.total_wirelen < min_wirelen) { + min_wirelen = g.total_wirelen; + } + int n_accept = 0, n_move = 0; + for (auto &t_data : t) { + n_accept += t_data.n_accept; + n_move += t_data.n_move; + } + double r_accept = n_accept / double(n_move); + if (g.total_wirelen < (0.95 * avg_wirelen) && g.total_wirelen > 0) { + avg_wirelen = 0.8 * avg_wirelen + 0.2 * g.total_wirelen; + } else { + if (r_accept > 0.15 && g.radius > 1) { + g.temperature *= 0.95; + } else { + g.temperature *= 0.8; + } + } + if ((iter % 10) == 0 && g.radius > 1) + --g.radius; + } + + if ((iter == 1) || ((iter % 5) == 0) || done) + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f\n", + iter, g.temperature, double(g.total_timing_cost), double(g.total_wirelen)); + + if (done) + break; + + do_partition(); + + std::vector workers; + workers.reserve(t.size()); + for (int j = 0; j < int(t.size()); j++) + workers.emplace_back([this, j]() { t.at(j).run_iter(); }); + for (auto &w : workers) + w.join(); + g.tmg.run(); + update_global_costs(); + iter++; + ctx->yield(); + } + auto refine_end = std::chrono::high_resolution_clock::now(); + log_info("Placement refine time %.02fs\n", std::chrono::duration(refine_end - refine_start).count()); + } +}; +} // namespace + +ParallelRefineCfg::ParallelRefineCfg(Context *ctx) +{ + timing_driven = ctx->setting("timing_driven"); + threads = ctx->setting("threads", 8); + // snap to nearest power of two; and minimum thread size + int actual_threads = 1; + while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size) + actual_threads *= 2; + threads = actual_threads; + hpwl_scale_x = 1; + hpwl_scale_y = 1; +} + +bool parallel_refine(Context *ctx, ParallelRefineCfg cfg) +{ + // TODO + ParallelRefine refine(ctx, cfg); + refine.run(); + timing_analysis(ctx); + return true; +} + +NEXTPNR_NAMESPACE_END + +#else /* !defined(__wasm) */ + +NEXTPNR_NAMESPACE_BEGIN + +bool parallel_refine(Context *ctx, ParallelRefineCfg cfg) { log_abort(); } + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/place/parallel_refine.h b/common/place/parallel_refine.h new file mode 100644 index 00000000..556317cd --- /dev/null +++ b/common/place/parallel_refine.h @@ -0,0 +1,43 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PARALLEL_REFINE_H +#define PARALLEL_REFINE_H + +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct ParallelRefineCfg +{ + ParallelRefineCfg(Context *ctx); + bool timing_driven; + int threads; + int hpwl_scale_x, hpwl_scale_y; + double lambda = 0.5f; + float crit_exp = 8; + int inner_iters = 15; + int min_thread_size = 500; +}; + +bool parallel_refine(Context *ctx, ParallelRefineCfg cfg); + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/place/place_common.cc b/common/place/place_common.cc new file mode 100644 index 00000000..e03fca55 --- /dev/null +++ b/common/place/place_common.cc @@ -0,0 +1,487 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "place_common.h" +#include +#include "log.h" +#include "util.h" + +NEXTPNR_NAMESPACE_BEGIN + +// Get the total estimated wirelength for a net +wirelen_t get_net_metric(const Context *ctx, const NetInfo *net, MetricType type, float &tns) +{ + wirelen_t wirelength = 0; + CellInfo *driver_cell = net->driver.cell; + if (!driver_cell) + return 0; + if (driver_cell->bel == BelId()) + return 0; + bool driver_gb = ctx->getBelGlobalBuf(driver_cell->bel); + if (driver_gb) + return 0; + int clock_count; + bool timing_driven = ctx->setting("timing_driven") && type == MetricType::COST && + ctx->getPortTimingClass(driver_cell, net->driver.port, clock_count) != TMG_IGNORE; + delay_t negative_slack = 0; + delay_t worst_slack = std::numeric_limits::max(); + Loc driver_loc = ctx->getBelLocation(driver_cell->bel); + int xmin = driver_loc.x, xmax = driver_loc.x, ymin = driver_loc.y, ymax = driver_loc.y; + for (auto load : net->users) { + if (load.cell == nullptr) + continue; + CellInfo *load_cell = load.cell; + if (load_cell->bel == BelId()) + continue; + if (timing_driven) { + delay_t net_delay = ctx->predictArcDelay(net, load); + auto slack = load.budget - net_delay; + if (slack < 0) + negative_slack += slack; + worst_slack = std::min(slack, worst_slack); + } + + if (ctx->getBelGlobalBuf(load_cell->bel)) + continue; + Loc load_loc = ctx->getBelLocation(load_cell->bel); + + xmin = std::min(xmin, load_loc.x); + ymin = std::min(ymin, load_loc.y); + xmax = std::max(xmax, load_loc.x); + ymax = std::max(ymax, load_loc.y); + } + if (timing_driven) { + wirelength = wirelen_t( + (((ymax - ymin) + (xmax - xmin)) * std::min(5.0, (1.0 + std::exp(-ctx->getDelayNS(worst_slack) / 5))))); + } else { + wirelength = wirelen_t((ymax - ymin) + (xmax - xmin)); + } + + tns += ctx->getDelayNS(negative_slack); + return wirelength; +} + +// Get the total wirelength for a cell +wirelen_t get_cell_metric(const Context *ctx, const CellInfo *cell, MetricType type) +{ + std::set nets; + for (auto p : cell->ports) { + if (p.second.net) + nets.insert(p.second.net->name); + } + wirelen_t wirelength = 0; + float tns = 0; + for (auto n : nets) { + wirelength += get_net_metric(ctx, ctx->nets.at(n).get(), type, tns); + } + return wirelength; +} + +wirelen_t get_cell_metric_at_bel(const Context *ctx, CellInfo *cell, BelId bel, MetricType type) +{ + BelId oldBel = cell->bel; + cell->bel = bel; + wirelen_t wirelen = get_cell_metric(ctx, cell, type); + cell->bel = oldBel; + return wirelen; +} + +// Placing a single cell +bool place_single_cell(Context *ctx, CellInfo *cell, bool require_legality) +{ + bool all_placed = false; + int iters = 25; + while (!all_placed) { + BelId best_bel = BelId(); + wirelen_t best_wirelen = std::numeric_limits::max(), + best_ripup_wirelen = std::numeric_limits::max(); + CellInfo *ripup_target = nullptr; + BelId ripup_bel = BelId(); + if (cell->bel != BelId()) { + ctx->unbindBel(cell->bel); + } + IdString targetType = cell->type; + for (auto bel : ctx->getBels()) { + if (ctx->isValidBelForCellType(targetType, bel)) { + if (ctx->checkBelAvail(bel)) { + wirelen_t wirelen = get_cell_metric_at_bel(ctx, cell, bel, MetricType::COST); + if (iters >= 4) + wirelen += ctx->rng(25); + if (wirelen <= best_wirelen) { + best_wirelen = wirelen; + best_bel = bel; + } + } else { + wirelen_t wirelen = get_cell_metric_at_bel(ctx, cell, bel, MetricType::COST); + if (iters >= 4) + wirelen += ctx->rng(25); + if (wirelen <= best_ripup_wirelen) { + CellInfo *curr_cell = ctx->getBoundBelCell(bel); + if (curr_cell->belStrength < STRENGTH_STRONG) { + best_ripup_wirelen = wirelen; + ripup_bel = bel; + ripup_target = curr_cell; + } + } + } + } + } + if (best_bel == BelId()) { + if (iters == 0) { + log_error("failed to place cell '%s' of type '%s' (ripup iteration limit exceeded)\n", + cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + if (ripup_bel == BelId()) { + log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + --iters; + ctx->unbindBel(ripup_target->bel); + best_bel = ripup_bel; + } else { + ripup_target = nullptr; + all_placed = true; + } + ctx->bindBel(best_bel, cell, STRENGTH_WEAK); + if (require_legality && !ctx->isBelLocationValid(best_bel)) { + ctx->unbindBel(best_bel); + if (ripup_target != nullptr) { + ctx->bindBel(best_bel, ripup_target, STRENGTH_WEAK); + } + all_placed = false; + continue; + } + if (ctx->verbose) + log_info(" placed single cell '%s' at '%s'\n", cell->name.c_str(ctx), ctx->nameOfBel(best_bel)); + cell = ripup_target; + } + return true; +} + +class ConstraintLegaliseWorker +{ + private: + Context *ctx; + std::set rippedCells; + dict oldLocations; + dict> cluster2cells; + + class IncreasingDiameterSearch + { + public: + IncreasingDiameterSearch() : start(0), min(0), max(-1){}; + IncreasingDiameterSearch(int x) : start(x), min(x), max(x){}; + IncreasingDiameterSearch(int start, int min, int max) : start(start), min(min), max(max){}; + bool done() const { return (diameter > (max - min)); }; + int get() const + { + int val = start + sign * diameter; + val = std::max(val, min); + val = std::min(val, max); + return val; + } + + void next() + { + if (sign == 0) { + sign = 1; + diameter = 1; + } else if (sign == -1) { + sign = 1; + if ((start + sign * diameter) > max) + sign = -1; + ++diameter; + } else { + sign = -1; + if ((start + sign * diameter) < min) { + sign = 1; + ++diameter; + } + } + } + + void reset() + { + sign = 0; + diameter = 0; + } + + private: + int start, min, max; + int diameter = 0; + int sign = 0; + }; + + typedef dict CellLocations; + + // Check if a location would be suitable for a cell and all its constrained children + bool valid_loc_for(const CellInfo *cell, Loc loc, CellLocations &solution, pool &usedLocations) + { + BelId locBel = ctx->getBelByLocation(loc); + if (locBel == BelId()) + return false; + + if (cell->cluster == ClusterId()) { + if (!ctx->isValidBelForCellType(cell->type, locBel)) + return false; + if (!ctx->checkBelAvail(locBel)) { + CellInfo *confCell = ctx->getConflictingBelCell(locBel); + if (confCell->belStrength >= STRENGTH_STRONG) { + return false; + } + } + // Don't place at tiles where any strongly bound Bels exist, as we might need to rip them up later + for (auto tilebel : ctx->getBelsByTile(loc.x, loc.y)) { + CellInfo *tcell = ctx->getBoundBelCell(tilebel); + if (tcell && tcell->belStrength >= STRENGTH_STRONG) + return false; + } + usedLocations.insert(loc); + solution[cell->name] = loc; + } else { + std::vector> placement; + if (!ctx->getClusterPlacement(cell->cluster, locBel, placement)) + return false; + for (auto &p : placement) { + Loc p_loc = ctx->getBelLocation(p.second); + if (!ctx->checkBelAvail(p.second)) { + CellInfo *confCell = ctx->getConflictingBelCell(p.second); + if (confCell->belStrength >= STRENGTH_STRONG) { + return false; + } + } + // Don't place at tiles where any strongly bound Bels exist, as we might need to rip them up later + for (auto tilebel : ctx->getBelsByTile(p_loc.x, p_loc.y)) { + CellInfo *tcell = ctx->getBoundBelCell(tilebel); + if (tcell && tcell->belStrength >= STRENGTH_STRONG) + return false; + } + usedLocations.insert(p_loc); + solution[p.first->name] = p_loc; + } + } + + return true; + } + + // Set the strength to locked on all cells in chain + void lockdown_chain(CellInfo *root) + { + root->belStrength = STRENGTH_STRONG; + if (root->cluster != ClusterId()) + for (auto child : cluster2cells.at(root->cluster)) + child->belStrength = STRENGTH_STRONG; + } + + // Legalise placement constraints on a cell + bool legalise_cell(CellInfo *cell) + { + if (cell->cluster != ClusterId() && ctx->getClusterRootCell(cell->cluster) != cell) + return true; // Only process chain roots + if (constraints_satisfied(cell)) { + if (cell->cluster != ClusterId()) + lockdown_chain(cell); + } else { + IncreasingDiameterSearch xRootSearch, yRootSearch, zRootSearch; + Loc currentLoc; + if (cell->bel != BelId()) + currentLoc = ctx->getBelLocation(cell->bel); + else + currentLoc = oldLocations[cell->name]; + xRootSearch = IncreasingDiameterSearch(currentLoc.x, 0, ctx->getGridDimX() - 1); + yRootSearch = IncreasingDiameterSearch(currentLoc.y, 0, ctx->getGridDimY() - 1); + zRootSearch = IncreasingDiameterSearch(currentLoc.z, 0, ctx->getTileBelDimZ(currentLoc.x, currentLoc.y)); + + while (!xRootSearch.done()) { + Loc rootLoc; + + rootLoc.x = xRootSearch.get(); + rootLoc.y = yRootSearch.get(); + rootLoc.z = zRootSearch.get(); + zRootSearch.next(); + if (zRootSearch.done()) { + zRootSearch.reset(); + yRootSearch.next(); + if (yRootSearch.done()) { + yRootSearch.reset(); + xRootSearch.next(); + } + } + + CellLocations solution; + pool used; + if (valid_loc_for(cell, rootLoc, solution, used)) { + for (auto cp : solution) { + // First unbind all cells + if (ctx->cells.at(cp.first)->bel != BelId()) + ctx->unbindBel(ctx->cells.at(cp.first)->bel); + } + for (auto cp : solution) { + if (ctx->verbose) + log_info(" placing '%s' at (%d, %d, %d)\n", cp.first.c_str(ctx), cp.second.x, + cp.second.y, cp.second.z); + BelId target = ctx->getBelByLocation(cp.second); + if (!ctx->checkBelAvail(target)) { + CellInfo *confl_cell = ctx->getConflictingBelCell(target); + if (confl_cell != nullptr) { + if (ctx->verbose) + log_info(" '%s' already placed at '%s'\n", ctx->nameOf(confl_cell), + ctx->nameOfBel(confl_cell->bel)); + NPNR_ASSERT(confl_cell->belStrength < STRENGTH_STRONG); + ctx->unbindBel(target); + rippedCells.insert(confl_cell->name); + } + } + ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_STRONG); + rippedCells.erase(cp.first); + } + for (auto cp : solution) { + for (auto bel : ctx->getBelsByTile(cp.second.x, cp.second.y)) { + CellInfo *belCell = ctx->getBoundBelCell(bel); + if (belCell != nullptr && !solution.count(belCell->name)) { + if (!ctx->isBelLocationValid(bel)) { + NPNR_ASSERT(belCell->belStrength < STRENGTH_STRONG); + ctx->unbindBel(bel); + rippedCells.insert(belCell->name); + } + } + } + } + NPNR_ASSERT(constraints_satisfied(cell)); + return true; + } + } + return false; + } + return true; + } + + // Check if constraints are currently satisfied on a cell and its children + bool constraints_satisfied(const CellInfo *cell) { return get_constraints_distance(ctx, cell) == 0; } + + public: + ConstraintLegaliseWorker(Context *ctx) : ctx(ctx) + { + for (auto &cell : ctx->cells) { + if (cell.second->cluster != ClusterId()) + cluster2cells[cell.second->cluster].push_back(cell.second.get()); + } + }; + + unsigned print_stats(const char *point) + { + float distance_sum = 0; + float max_distance = 0; + unsigned moved_cells = 0; + unsigned unplaced_cells = 0; + for (auto orig : oldLocations) { + if (ctx->cells.at(orig.first)->bel == BelId()) { + unplaced_cells++; + continue; + } + Loc newLoc = ctx->getBelLocation(ctx->cells.at(orig.first)->bel); + if (newLoc != orig.second) { + float distance = std::sqrt(std::pow(newLoc.x - orig.second.x, 2) + pow(newLoc.y - orig.second.y, 2)); + moved_cells++; + distance_sum += distance; + if (distance > max_distance) + max_distance = distance; + } + } + log_info(" moved %d cells, %d unplaced (after %s)\n", moved_cells, unplaced_cells, point); + if (moved_cells > 0) { + log_info(" average distance %f\n", (distance_sum / moved_cells)); + log_info(" maximum distance %f\n", max_distance); + } + return moved_cells + unplaced_cells; + } + + int legalise_constraints() + { + log_info("Legalising relative constraints...\n"); + for (auto &cell : ctx->cells) { + oldLocations[cell.first] = ctx->getBelLocation(cell.second->bel); + } + for (auto &cell : ctx->cells) { + bool res = legalise_cell(cell.second.get()); + if (!res) { + log_error("failed to place chain starting at cell '%s'\n", cell.first.c_str(ctx)); + return -1; + } + } + if (print_stats("legalising chains") == 0) + return 0; + for (auto rippedCell : rippedCells) { + bool res = place_single_cell(ctx, ctx->cells.at(rippedCell).get(), true); + if (!res) { + log_error("failed to place cell '%s' after relative constraint legalisation\n", rippedCell.c_str(ctx)); + return -1; + } + } + auto score = print_stats("replacing ripped up cells"); + for (auto &cell : ctx->cells) + if (get_constraints_distance(ctx, cell.second.get()) != 0) + log_error("constraint satisfaction check failed for cell '%s' at Bel '%s'\n", cell.first.c_str(ctx), + ctx->nameOfBel(cell.second->bel)); + return score; + } +}; + +bool legalise_relative_constraints(Context *ctx) { return ConstraintLegaliseWorker(ctx).legalise_constraints() > 0; } + +// Get the total distance from satisfied constraints for a cell +int get_constraints_distance(const Context *ctx, const CellInfo *cell) +{ + int dist = 0; + if (cell->bel == BelId()) + return 100000; + Loc loc = ctx->getBelLocation(cell->bel); + + if (cell->cluster != ClusterId()) { + CellInfo *root = ctx->getClusterRootCell(cell->cluster); + if (root == cell) { + // parent + std::vector> placement; + if (!ctx->getClusterPlacement(cell->cluster, cell->bel, placement)) { + return 100000; + } else { + for (const auto &p : placement) { + if (p.first->bel == BelId()) + return 100000; + Loc c_loc = ctx->getBelLocation(p.first->bel); + Loc p_loc = ctx->getBelLocation(p.second); + dist += std::abs(c_loc.x - p_loc.x); + dist += std::abs(c_loc.y - p_loc.y); + dist += std::abs(c_loc.z - p_loc.z); + } + } + } else { + // child + if (root->bel == BelId()) + return 100000; + Loc root_loc = ctx->getBelLocation(root->bel); + Loc offset = ctx->getClusterOffset(cell); + dist += std::abs((root_loc.x + offset.x) - loc.x); + dist += std::abs((root_loc.y + offset.y) - loc.y); + } + } + + return dist; +} + +NEXTPNR_NAMESPACE_END diff --git a/common/place/place_common.h b/common/place/place_common.h new file mode 100644 index 00000000..5e5cbee3 --- /dev/null +++ b/common/place/place_common.h @@ -0,0 +1,55 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PLACE_COMMON_H +#define PLACE_COMMON_H + +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +typedef int64_t wirelen_t; + +enum class MetricType +{ + COST, + WIRELENGTH +}; + +// Return the wirelength of a net +wirelen_t get_net_metric(const Context *ctx, const NetInfo *net, MetricType type, float &tns); + +// Return the wirelength of all nets connected to a cell +wirelen_t get_cell_metric(const Context *ctx, const CellInfo *cell, MetricType type); + +// Return the wirelength of all nets connected to a cell, when the cell is at a given bel +wirelen_t get_cell_metric_at_bel(const Context *ctx, CellInfo *cell, BelId bel, MetricType type); + +// Place a single cell in the lowest wirelength Bel available, optionally requiring validity check +bool place_single_cell(Context *ctx, CellInfo *cell, bool require_legality); + +// Modify a design s.t. all relative placement constraints are satisfied +bool legalise_relative_constraints(Context *ctx); + +// Get the total distance from satisfied constraints for a cell +int get_constraints_distance(const Context *ctx, const CellInfo *cell); + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/place/placer1.cc b/common/place/placer1.cc new file mode 100644 index 00000000..a6ba3895 --- /dev/null +++ b/common/place/placer1.cc @@ -0,0 +1,1317 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 Claire Xenia Wolf + * Copyright (C) 2018 gatecat + * + * Simulated annealing implementation based on arachne-pnr + * Copyright (C) 2015-2018 Cotton Seed + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "placer1.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fast_bels.h" +#include "log.h" +#include "place_common.h" +#include "scope_lock.h" +#include "timing.h" +#include "util.h" + +NEXTPNR_NAMESPACE_BEGIN + +class SAPlacer +{ + private: + struct BoundingBox + { + // Actual bounding box + int x0 = 0, x1 = 0, y0 = 0, y1 = 0; + // Number of cells at each extremity + int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; + wirelen_t hpwl(const Placer1Cfg &cfg) const + { + return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); + } + }; + + public: + SAPlacer(Context *ctx, Placer1Cfg cfg) + : ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg), tmg(ctx) + { + for (auto bel : ctx->getBels()) { + Loc loc = ctx->getBelLocation(bel); + max_x = std::max(max_x, loc.x); + max_y = std::max(max_y, loc.y); + } + diameter = std::max(max_x, max_y) + 1; + + pool cell_types_in_use; + for (auto &cell : ctx->cells) { + IdString cell_type = cell.second->type; + cell_types_in_use.insert(cell_type); + } + + for (auto cell_type : cell_types_in_use) { + fast_bels.addCellType(cell_type); + } + + net_bounds.resize(ctx->nets.size()); + net_arc_tcost.resize(ctx->nets.size()); + old_udata.reserve(ctx->nets.size()); + net_by_udata.reserve(ctx->nets.size()); + decltype(NetInfo::udata) n = 0; + for (auto &net : ctx->nets) { + old_udata.emplace_back(net.second->udata); + net_arc_tcost.at(n).resize(net.second->users.capacity()); + net.second->udata = n++; + net_by_udata.push_back(net.second.get()); + } + for (auto ®ion : ctx->region) { + Region *r = region.second.get(); + BoundingBox bb; + if (r->constr_bels) { + bb.x0 = std::numeric_limits::max(); + bb.x1 = std::numeric_limits::min(); + bb.y0 = std::numeric_limits::max(); + bb.y1 = std::numeric_limits::min(); + for (auto bel : r->bels) { + Loc loc = ctx->getBelLocation(bel); + bb.x0 = std::min(bb.x0, loc.x); + bb.x1 = std::max(bb.x1, loc.x); + bb.y0 = std::min(bb.y0, loc.y); + bb.y1 = std::max(bb.y1, loc.y); + } + } else { + bb.x0 = 0; + bb.y0 = 0; + bb.x1 = max_x; + bb.y1 = max_y; + } + region_bounds[r->name] = bb; + } + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->cluster == ClusterId()) + continue; + cluster2cell[ci->cluster].push_back(ci); + } + } + + ~SAPlacer() + { + for (auto &net : ctx->nets) + net.second->udata = old_udata[net.second->udata]; + } + + bool place(bool refine = false) + { + log_break(); + + ScopeLock lock(ctx); + + size_t placed_cells = 0; + std::vector autoplaced; + std::vector chain_basis; + if (!refine) { + // Initial constraints placer + for (auto &cell_entry : ctx->cells) { + CellInfo *cell = cell_entry.second.get(); + auto loc = cell->attrs.find(ctx->id("BEL")); + if (loc != cell->attrs.end()) { + std::string loc_name = loc->second.as_string(); + BelId bel = ctx->getBelByNameStr(loc_name); + if (bel == BelId()) { + log_error("No Bel named \'%s\' located for " + "this chip (processing BEL attribute on \'%s\')\n", + loc_name.c_str(), cell->name.c_str(ctx)); + } + + if (!ctx->isValidBelForCellType(cell->type, bel)) { + IdString bel_type = ctx->getBelType(bel); + log_error("Bel \'%s\' of type \'%s\' does not match cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + auto bound_cell = ctx->getBoundBelCell(bel); + if (bound_cell) { + log_error( + "Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", + cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); + } + + ctx->bindBel(bel, cell, STRENGTH_USER); + if (!ctx->isBelLocationValid(bel)) { + IdString bel_type = ctx->getBelType(bel); + log_error("Bel \'%s\' of type \'%s\' is not valid for cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + locked_bels.insert(bel); + placed_cells++; + } + } + int constr_placed_cells = placed_cells; + log_info("Placed %d cells based on constraints.\n", int(placed_cells)); + ctx->yield(); + + // Sort to-place cells for deterministic initial placement + + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->bel == BelId()) { + autoplaced.push_back(cell.second.get()); + } + } + std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; }); + ctx->shuffle(autoplaced); + auto iplace_start = std::chrono::high_resolution_clock::now(); + // Place cells randomly initially + log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size())); + + for (auto cell : autoplaced) { + place_initial(cell); + placed_cells++; + if ((placed_cells - constr_placed_cells) % 500 == 0) + log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), + int(autoplaced.size())); + } + if ((placed_cells - constr_placed_cells) % 500 != 0) + log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), + int(autoplaced.size())); + if (cfg.budgetBased && cfg.slack_redist_iter > 0) + assign_budget(ctx); + ctx->yield(); + auto iplace_end = std::chrono::high_resolution_clock::now(); + log_info("Initial placement time %.02fs\n", + std::chrono::duration(iplace_end - iplace_start).count()); + log_info("Running simulated annealing placer.\n"); + } else { + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->belStrength > STRENGTH_STRONG) { + continue; + } else if (ci->cluster != ClusterId()) { + if (ctx->getClusterRootCell(ci->cluster) == ci) + chain_basis.push_back(ci); + else + continue; + } else { + autoplaced.push_back(ci); + } + } + require_legal = false; + diameter = 3; + log_info("Running simulated annealing placer for refinement.\n"); + } + auto saplace_start = std::chrono::high_resolution_clock::now(); + + // Invoke timing analysis to obtain criticalities + tmg.setup_only = true; + if (!cfg.budgetBased) + tmg.setup(); + + // Calculate costs after initial placement + setup_costs(); + moveChange.init(this); + curr_wirelen_cost = total_wirelen_cost(); + curr_timing_cost = total_timing_cost(); + last_wirelen_cost = curr_wirelen_cost; + last_timing_cost = curr_timing_cost; + + if (cfg.netShareWeight > 0) + setup_nets_by_tile(); + + wirelen_t avg_wirelen = curr_wirelen_cost; + wirelen_t min_wirelen = curr_wirelen_cost; + + int n_no_progress = 0; + temp = refine ? 1e-7 : cfg.startTemp; + + // Main simulated annealing loop + for (int iter = 1;; iter++) { + n_move = n_accept = 0; + improved = false; + + if (iter % 5 == 0 || iter == 1) + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f\n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); + + for (int m = 0; m < 15; ++m) { + // Loop through all automatically placed cells + for (auto cell : autoplaced) { + // Find another random Bel for this cell + BelId try_bel = random_bel_for_cell(cell); + // If valid, try and swap to a new position and see if + // the new position is valid/worthwhile + if (try_bel != BelId() && try_bel != cell->bel) + try_swap_position(cell, try_bel); + } + // Also try swapping chains, if applicable + for (auto cb : chain_basis) { + Loc chain_base_loc = ctx->getBelLocation(cb->bel); + BelId try_base = random_bel_for_cell(cb, chain_base_loc.z); + if (try_base != BelId() && try_base != cb->bel) + try_swap_chain(cb, try_base); + } + } + + if (ctx->debug) { + // Verify correctness of incremental wirelen updates + for (size_t i = 0; i < net_bounds.size(); i++) { + auto net = net_by_udata[i]; + if (ignore_net(net)) + continue; + auto &incr = net_bounds.at(i), gold = get_net_bounds(net); + NPNR_ASSERT(incr.x0 == gold.x0); + NPNR_ASSERT(incr.x1 == gold.x1); + NPNR_ASSERT(incr.y0 == gold.y0); + NPNR_ASSERT(incr.y1 == gold.y1); + NPNR_ASSERT(incr.nx0 == gold.nx0); + NPNR_ASSERT(incr.nx1 == gold.nx1); + NPNR_ASSERT(incr.ny0 == gold.ny0); + NPNR_ASSERT(incr.ny1 == gold.ny1); + } + } + + if (curr_wirelen_cost < min_wirelen) { + min_wirelen = curr_wirelen_cost; + improved = true; + } + + // Heuristic to improve placement on the 8k + if (improved) + n_no_progress = 0; + else + n_no_progress++; + + if (temp <= 1e-7 && n_no_progress >= (refine ? 1 : 5)) { + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f \n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); + break; + } + + double Raccept = double(n_accept) / double(n_move); + + int M = std::max(max_x, max_y) + 1; + + if (ctx->verbose) + log("iter #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); + + if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { + avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; + } else { + double diam_next = diameter * (1.0 - 0.44 + Raccept); + diameter = std::max(1, std::min(M, int(diam_next + 0.5))); + if (Raccept > 0.96) { + temp *= 0.5; + } else if (Raccept > 0.8) { + temp *= 0.9; + } else if (Raccept > 0.15 && diameter > 1) { + temp *= 0.95; + } else { + temp *= 0.8; + } + } + // Once cooled below legalise threshold, run legalisation and start requiring + // legal moves only + if (diameter < legalise_dia && require_legal) { + if (legalise_relative_constraints(ctx)) { + // Only increase temperature if something was moved + autoplaced.clear(); + chain_basis.clear(); + for (auto &cell : ctx->cells) { + if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->cluster != ClusterId() && + ctx->getClusterRootCell(cell.second->cluster) == cell.second.get()) + chain_basis.push_back(cell.second.get()); + else if (cell.second->belStrength < STRENGTH_STRONG) + autoplaced.push_back(cell.second.get()); + } + // temp = post_legalise_temp; + // diameter = std::min(M, diameter * post_legalise_dia_scale); + ctx->shuffle(autoplaced); + + // Legalisation is a big change so force a slack redistribution here + if (cfg.slack_redist_iter > 0 && cfg.budgetBased) + assign_budget(ctx, true /* quiet */); + } + require_legal = false; + } else if (cfg.budgetBased && cfg.slack_redist_iter > 0 && iter % cfg.slack_redist_iter == 0) { + assign_budget(ctx, true /* quiet */); + } + + // Invoke timing analysis to obtain criticalities + if (!cfg.budgetBased && cfg.timing_driven) + tmg.run(); + // Need to rebuild costs after criticalities change + setup_costs(); + // Reset incremental bounds + moveChange.reset(this); + moveChange.new_net_bounds = net_bounds; + + // Recalculate total metric entirely to avoid rounding errors + // accumulating over time + curr_wirelen_cost = total_wirelen_cost(); + curr_timing_cost = total_timing_cost(); + last_wirelen_cost = curr_wirelen_cost; + last_timing_cost = curr_timing_cost; + // Let the UI show visualization updates. + ctx->yield(); + } + + auto saplace_end = std::chrono::high_resolution_clock::now(); + log_info("SA placement time %.02fs\n", std::chrono::duration(saplace_end - saplace_start).count()); + + // Final post-placement validity check + ctx->yield(); + for (auto bel : ctx->getBels()) { + CellInfo *cell = ctx->getBoundBelCell(bel); + if (!ctx->isBelLocationValid(bel)) { + std::string cell_text = "no cell"; + if (cell != nullptr) + cell_text = std::string("cell '") + ctx->nameOf(cell) + "'"; + if (ctx->force) { + log_warning("post-placement validity check failed for Bel '%s' " + "(%s)\n", + ctx->nameOfBel(bel), cell_text.c_str()); + } else { + log_error("post-placement validity check failed for Bel '%s' " + "(%s)\n", + ctx->nameOfBel(bel), cell_text.c_str()); + } + } + } + timing_analysis(ctx); + + return true; + } + + private: + // Initial random placement + void place_initial(CellInfo *cell) + { + bool all_placed = false; + int iters = 25; + while (!all_placed) { + BelId best_bel = BelId(); + uint64_t best_score = std::numeric_limits::max(), + best_ripup_score = std::numeric_limits::max(); + CellInfo *ripup_target = nullptr; + BelId ripup_bel = BelId(); + if (cell->bel != BelId()) { + ctx->unbindBel(cell->bel); + } + IdString targetType = cell->type; + + auto proc_bel = [&](BelId bel) { + if (ctx->isValidBelForCellType(targetType, bel)) { + if (ctx->checkBelAvail(bel)) { + uint64_t score = ctx->rng64(); + if (score <= best_score) { + best_score = score; + best_bel = bel; + } + } else { + uint64_t score = ctx->rng64(); + CellInfo *bound_cell = ctx->getBoundBelCell(bel); + if (score <= best_ripup_score && bound_cell->belStrength < STRENGTH_STRONG) { + best_ripup_score = score; + ripup_target = bound_cell; + ripup_bel = bel; + } + } + } + }; + + if (cell->region != nullptr && cell->region->constr_bels) { + for (auto bel : cell->region->bels) { + proc_bel(bel); + } + } else { + for (auto bel : ctx->getBels()) { + proc_bel(bel); + } + } + + if (best_bel == BelId()) { + if (iters == 0 || ripup_bel == BelId()) + log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx)); + --iters; + ctx->unbindBel(ripup_target->bel); + best_bel = ripup_bel; + } else { + ripup_target = nullptr; + all_placed = true; + } + ctx->bindBel(best_bel, cell, STRENGTH_WEAK); + + if (!ctx->isBelLocationValid(best_bel)) { + ctx->unbindBel(best_bel); + if (ripup_target != nullptr) { + ctx->bindBel(best_bel, ripup_target, STRENGTH_WEAK); + } + all_placed = false; + continue; + } + + // Back annotate location + cell->attrs[ctx->id("BEL")] = ctx->getBelName(cell->bel).str(ctx); + cell = ripup_target; + } + } + + // Attempt a SA position swap, return true on success or false on failure + bool try_swap_position(CellInfo *cell, BelId newBel) + { + static const double epsilon = 1e-20; + moveChange.reset(this); + if (!require_legal && cell->cluster != ClusterId()) + return false; + BelId oldBel = cell->bel; + CellInfo *other_cell = ctx->getBoundBelCell(newBel); + if (!require_legal && other_cell != nullptr && + (other_cell->cluster != ClusterId() || other_cell->belStrength > STRENGTH_WEAK)) { + return false; + } + int old_dist = get_constraints_distance(ctx, cell); + int new_dist; + if (other_cell != nullptr) + old_dist += get_constraints_distance(ctx, other_cell); + double delta = 0; + + if (!ctx->isValidBelForCellType(cell->type, newBel)) { + return false; + } + if (other_cell != nullptr && !ctx->isValidBelForCellType(other_cell->type, oldBel)) { + return false; + } + + int net_delta_score = 0; + if (cfg.netShareWeight > 0) + net_delta_score += update_nets_by_tile(cell, ctx->getBelLocation(cell->bel), ctx->getBelLocation(newBel)); + + ctx->unbindBel(oldBel); + if (other_cell != nullptr) { + ctx->unbindBel(newBel); + } + + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + + if (other_cell != nullptr) { + ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + net_delta_score += + update_nets_by_tile(other_cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); + } + + add_move_cell(moveChange, cell, oldBel); + + if (other_cell != nullptr) { + add_move_cell(moveChange, other_cell, newBel); + } + + // Always check both the new and old locations; as in some cases of dedicated routing ripping up a cell can deny + // use of a dedicated path and thus make a site illegal + if (!ctx->isBelLocationValid(newBel) || !ctx->isBelLocationValid(oldBel)) { + ctx->unbindBel(newBel); + if (other_cell != nullptr) + ctx->unbindBel(oldBel); + goto swap_fail; + } + + // Recalculate metrics for all nets touched by the perturbation + compute_cost_changes(moveChange); + + new_dist = get_constraints_distance(ctx, cell); + if (other_cell != nullptr) + new_dist += get_constraints_distance(ctx, other_cell); + delta = lambda * (moveChange.timing_delta / std::max(last_timing_cost, epsilon)) + + (1 - lambda) * (double(moveChange.wirelen_delta) / std::max(last_wirelen_cost, epsilon)); + delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; + if (cfg.netShareWeight > 0) + delta += -cfg.netShareWeight * (net_delta_score / std::max(total_net_share, epsilon)); + n_move++; + // SA acceptance criteria + if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { + n_accept++; + } else { + if (other_cell != nullptr) + ctx->unbindBel(oldBel); + ctx->unbindBel(newBel); + goto swap_fail; + } + commit_cost_changes(moveChange); +#if 0 + log_info("swap %s -> %s\n", cell->name.c_str(ctx), ctx->nameOfBel(newBel)); + if (other_cell != nullptr) + log_info("swap %s -> %s\n", other_cell->name.c_str(ctx), ctx->nameOfBel(oldBel)); +#endif + return true; + swap_fail: + ctx->bindBel(oldBel, cell, STRENGTH_WEAK); + if (other_cell != nullptr) { + ctx->bindBel(newBel, other_cell, STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + update_nets_by_tile(other_cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel)); + } + if (cfg.netShareWeight > 0) + update_nets_by_tile(cell, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); + return false; + } + + // Swap the Bel of a cell with another, return the original location + BelId swap_cell_bels(CellInfo *cell, BelId newBel) + { + BelId oldBel = cell->bel; +#if 0 + log_info("%s old: %s new: %s\n", cell->name.c_str(ctx), ctx->nameOfBel(cell->bel), ctx->nameOfBel(newBel)); +#endif + CellInfo *bound = ctx->getBoundBelCell(newBel); + if (bound != nullptr) + ctx->unbindBel(newBel); + ctx->unbindBel(oldBel); + ctx->bindBel(newBel, cell, (cell->cluster != ClusterId()) ? STRENGTH_STRONG : STRENGTH_WEAK); + if (bound != nullptr) { + ctx->bindBel(oldBel, bound, (bound->cluster != ClusterId()) ? STRENGTH_STRONG : STRENGTH_WEAK); + if (cfg.netShareWeight > 0) + update_nets_by_tile(bound, ctx->getBelLocation(newBel), ctx->getBelLocation(oldBel)); + } + if (cfg.netShareWeight > 0) + update_nets_by_tile(cell, ctx->getBelLocation(oldBel), ctx->getBelLocation(newBel)); + return oldBel; + } + + // Attempt to swap a chain with a non-chain + bool try_swap_chain(CellInfo *cell, BelId newBase) + { + std::vector> cell_rel; + dict moved_cells; + double delta = 0; + int orig_share_cost = total_net_share; + moveChange.reset(this); +#if CHAIN_DEBUG + log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx)); +#endif + std::queue> displaced_clusters; + displaced_clusters.emplace(cell->cluster, newBase); + while (!displaced_clusters.empty()) { + std::vector> dest_bels; + auto cursor = displaced_clusters.front(); +#if CHAIN_DEBUG + log_info("%d Cluster %s\n", __LINE__, cursor.first.c_str(ctx)); +#endif + displaced_clusters.pop(); + if (!ctx->getClusterPlacement(cursor.first, cursor.second, dest_bels)) + goto swap_fail; + for (const auto &db : dest_bels) { + // Ensure the cluster is ripped up + if (db.first->bel != BelId()) { + moved_cells[db.first->name] = db.first->bel; +#if CHAIN_DEBUG + log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(db.first->bel)); +#endif + ctx->unbindBel(db.first->bel); + } + } + for (const auto &db : dest_bels) { + CellInfo *bound = ctx->getBoundBelCell(db.second); + BelId old_bel = moved_cells.at(db.first->name); + if (!ctx->checkBelAvail(old_bel) && bound != nullptr) { + // Simple swap no longer possible + goto swap_fail; + } + if (bound != nullptr) { + if (moved_cells.count(bound->name)) { + // Don't move a cell multiple times in the same go + goto swap_fail; + } else if (bound->belStrength > STRENGTH_STRONG) { + goto swap_fail; + } else if (bound->cluster != ClusterId()) { + // Displace the entire cluster + Loc old_loc = ctx->getBelLocation(old_bel); + Loc bound_loc = ctx->getBelLocation(bound->bel); + Loc root_loc = ctx->getBelLocation(ctx->getClusterRootCell(bound->cluster)->bel); + BelId new_root = ctx->getBelByLocation(Loc(old_loc.x + (root_loc.x - bound_loc.x), + old_loc.y + (root_loc.y - bound_loc.y), + old_loc.z + (root_loc.z - bound_loc.z))); + if (new_root == BelId()) + goto swap_fail; + for (auto cluster_cell : cluster2cell.at(bound->cluster)) { + moved_cells[cluster_cell->name] = cluster_cell->bel; +#if CHAIN_DEBUG + log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(cluster_cell->bel)); +#endif + ctx->unbindBel(cluster_cell->bel); + } + displaced_clusters.emplace(bound->cluster, new_root); + } else { + // Just a single cell to move + moved_cells[bound->name] = bound->bel; +#if CHAIN_DEBUG + log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(bound->bel)); + log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(old_bel), ctx->nameOf(bound)); +#endif + ctx->unbindBel(bound->bel); + ctx->bindBel(old_bel, bound, STRENGTH_WEAK); + } + } else if (!ctx->checkBelAvail(db.second)) { + goto swap_fail; + } + // All those shenanigans should now mean the target bel is free to use +#if CHAIN_DEBUG + log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(db.second), ctx->nameOf(db.first)); +#endif + ctx->bindBel(db.second, db.first, STRENGTH_WEAK); + } + } + + for (const auto &mm : moved_cells) { + CellInfo *cell = ctx->cells.at(mm.first).get(); + add_move_cell(moveChange, cell, moved_cells.at(cell->name)); + if (cfg.netShareWeight > 0) + update_nets_by_tile(cell, ctx->getBelLocation(moved_cells.at(cell->name)), + ctx->getBelLocation(cell->bel)); + if (!ctx->isBelLocationValid(cell->bel) || !cell->testRegion(cell->bel)) + goto swap_fail; + } +#if CHAIN_DEBUG + log_info("legal chain swap %s\n", cell->name.c_str(ctx)); +#endif + compute_cost_changes(moveChange); + delta = lambda * (moveChange.timing_delta / last_timing_cost) + + (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); + if (cfg.netShareWeight > 0) { + delta += + cfg.netShareWeight * (orig_share_cost - total_net_share) / std::max(total_net_share, 1e-20); + } + n_move++; + // SA acceptance criteria + if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { + n_accept++; +#if CHAIN_DEBUG + log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); +#endif + } else { + goto swap_fail; + } + commit_cost_changes(moveChange); + return true; + swap_fail: +#if CHAIN_DEBUG + log_info("Swap failed\n"); +#endif + for (auto cell_pair : moved_cells) { + CellInfo *cell = ctx->cells.at(cell_pair.first).get(); + if (cell->bel != BelId()) { +#if CHAIN_DEBUG + log_info("%d unbind %s\n", __LINE__, ctx->nameOfBel(cell->bel)); +#endif + ctx->unbindBel(cell->bel); + } + } + for (auto cell_pair : moved_cells) { + CellInfo *cell = ctx->cells.at(cell_pair.first).get(); +#if CHAIN_DEBUG + log_info("%d bind %s %s\n", __LINE__, ctx->nameOfBel(cell_pair.second), cell->name.c_str(ctx)); +#endif + ctx->bindBel(cell_pair.second, cell, STRENGTH_WEAK); + } + return false; + } + + // Find a random Bel of the correct type for a cell, within the specified + // diameter + BelId random_bel_for_cell(CellInfo *cell, int force_z = -1) + { + IdString targetType = cell->type; + Loc curr_loc = ctx->getBelLocation(cell->bel); + int count = 0; + + int dx = diameter, dy = diameter; + if (cell->region != nullptr && cell->region->constr_bels) { + dx = std::min(cfg.hpwl_scale_x * diameter, + (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1); + dy = std::min(cfg.hpwl_scale_y * diameter, + (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1); + // Clamp location to within bounds + curr_loc.x = std::max(region_bounds[cell->region->name].x0, curr_loc.x); + curr_loc.x = std::min(region_bounds[cell->region->name].x1, curr_loc.x); + curr_loc.y = std::max(region_bounds[cell->region->name].y0, curr_loc.y); + curr_loc.y = std::min(region_bounds[cell->region->name].y1, curr_loc.y); + } + + FastBels::FastBelsData *bel_data; + auto type_cnt = fast_bels.getBelsForCellType(targetType, &bel_data); + + while (true) { + int nx = ctx->rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0); + int ny = ctx->rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0); + if (cfg.minBelsForGridPick >= 0 && type_cnt < cfg.minBelsForGridPick) + nx = ny = 0; + if (nx >= int(bel_data->size())) + continue; + if (ny >= int(bel_data->at(nx).size())) + continue; + const auto &fb = bel_data->at(nx).at(ny); + if (fb.size() == 0) + continue; + BelId bel = fb.at(ctx->rng(int(fb.size()))); + if (force_z != -1) { + Loc loc = ctx->getBelLocation(bel); + if (loc.z != force_z) + continue; + } + if (!cell->testRegion(bel)) + continue; + if (locked_bels.find(bel) != locked_bels.end()) + continue; + count++; + return bel; + } + } + + // Return true if a net is to be entirely ignored + inline bool ignore_net(NetInfo *net) + { + return net->driver.cell == nullptr || net->driver.cell->bel == BelId() || + ctx->getBelGlobalBuf(net->driver.cell->bel); + } + + // Get the bounding box for a net + inline BoundingBox get_net_bounds(NetInfo *net) + { + BoundingBox bb; + NPNR_ASSERT(net->driver.cell != nullptr); + Loc dloc = ctx->getBelLocation(net->driver.cell->bel); + bb.x0 = dloc.x; + bb.x1 = dloc.x; + bb.y0 = dloc.y; + bb.y1 = dloc.y; + bb.nx0 = 1; + bb.nx1 = 1; + bb.ny0 = 1; + bb.ny1 = 1; + for (auto user : net->users) { + if (user.cell->bel == BelId()) + continue; + Loc uloc = ctx->getBelLocation(user.cell->bel); + if (bb.x0 == uloc.x) + ++bb.nx0; + else if (uloc.x < bb.x0) { + bb.x0 = uloc.x; + bb.nx0 = 1; + } + if (bb.x1 == uloc.x) + ++bb.nx1; + else if (uloc.x > bb.x1) { + bb.x1 = uloc.x; + bb.nx1 = 1; + } + if (bb.y0 == uloc.y) + ++bb.ny0; + else if (uloc.y < bb.y0) { + bb.y0 = uloc.y; + bb.ny0 = 1; + } + if (bb.y1 == uloc.y) + ++bb.ny1; + else if (uloc.y > bb.y1) { + bb.y1 = uloc.y; + bb.ny1 = 1; + } + } + + return bb; + } + + // Get the timing cost for an arc of a net + inline double get_timing_cost(NetInfo *net, const PortRef &user) + { + int cc; + if (net->driver.cell == nullptr) + return 0; + if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE) + return 0; + if (cfg.budgetBased) { + double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user)); + return std::min(10.0, std::exp(delay - ctx->getDelayNS(user.budget) / 10)); + } else { + float crit = tmg.get_criticality(CellPortKey(user)); + double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user)); + return delay * std::pow(crit, crit_exp); + } + } + + // Set up the cost maps + void setup_costs() + { + for (auto &net : ctx->nets) { + NetInfo *ni = net.second.get(); + if (ignore_net(ni)) + continue; + net_bounds[ni->udata] = get_net_bounds(ni); + if (cfg.timing_driven && int(ni->users.entries()) < cfg.timingFanoutThresh) + for (auto usr : ni->users.enumerate()) + net_arc_tcost[ni->udata][usr.index.idx()] = get_timing_cost(ni, usr.value); + } + } + + // Get the total wiring cost for the design + wirelen_t total_wirelen_cost() + { + wirelen_t cost = 0; + for (const auto &net : net_bounds) + cost += net.hpwl(cfg); + return cost; + } + + // Get the total timing cost for the design + double total_timing_cost() + { + double cost = 0; + for (const auto &net : net_arc_tcost) { + for (auto arc_cost : net) { + cost += arc_cost; + } + } + return cost; + } + + // Cost-change-related data for a move + struct MoveChangeData + { + + enum BoundChangeType + { + NO_CHANGE, + CELL_MOVED_INWARDS, + CELL_MOVED_OUTWARDS, + FULL_RECOMPUTE + }; + + std::vector bounds_changed_nets_x, bounds_changed_nets_y; + std::vector>> changed_arcs; + + std::vector already_bounds_changed_x, already_bounds_changed_y; + std::vector> already_changed_arcs; + + std::vector new_net_bounds; + std::vector>, double>> new_arc_costs; + + wirelen_t wirelen_delta = 0; + double timing_delta = 0; + + void init(SAPlacer *p) + { + already_bounds_changed_x.resize(p->ctx->nets.size()); + already_bounds_changed_y.resize(p->ctx->nets.size()); + already_changed_arcs.resize(p->ctx->nets.size()); + for (auto &net : p->ctx->nets) { + already_changed_arcs.at(net.second->udata).resize(net.second->users.capacity()); + } + new_net_bounds = p->net_bounds; + } + + void reset(SAPlacer *p) + { + for (auto bc : bounds_changed_nets_x) { + new_net_bounds[bc] = p->net_bounds[bc]; + already_bounds_changed_x[bc] = NO_CHANGE; + } + for (auto bc : bounds_changed_nets_y) { + new_net_bounds[bc] = p->net_bounds[bc]; + already_bounds_changed_y[bc] = NO_CHANGE; + } + for (const auto &tc : changed_arcs) + already_changed_arcs[tc.first][tc.second.idx()] = false; + bounds_changed_nets_x.clear(); + bounds_changed_nets_y.clear(); + changed_arcs.clear(); + new_arc_costs.clear(); + wirelen_delta = 0; + timing_delta = 0; + } + + } moveChange; + + void add_move_cell(MoveChangeData &mc, CellInfo *cell, BelId old_bel) + { + Loc curr_loc = ctx->getBelLocation(cell->bel); + Loc old_loc = ctx->getBelLocation(old_bel); + // Check net bounds + for (const auto &port : cell->ports) { + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (ignore_net(pn)) + continue; + BoundingBox &curr_bounds = mc.new_net_bounds[pn->udata]; + // Incremental bounding box updates + // Note that everything other than full updates are applied immediately rather than being queued, + // so further updates to the same net in the same move are dealt with correctly. + // If a full update is already queued, this can be considered a no-op + if (mc.already_bounds_changed_x[pn->udata] != MoveChangeData::FULL_RECOMPUTE) { + // Bounds x0 + if (curr_loc.x < curr_bounds.x0) { + // Further out than current bounds x0 + curr_bounds.x0 = curr_loc.x; + curr_bounds.nx0 = 1; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { + // Checking already_bounds_changed_x ensures that each net is only added once + // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the + // overall cost change + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_x.push_back(pn->udata); + } + } else if (curr_loc.x == curr_bounds.x0 && old_loc.x > curr_bounds.x0) { + curr_bounds.nx0++; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_x.push_back(pn->udata); + } + } else if (old_loc.x == curr_bounds.x0 && curr_loc.x > curr_bounds.x0) { + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) + mc.bounds_changed_nets_x.push_back(pn->udata); + if (curr_bounds.nx0 == 1) { + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE; + } else { + curr_bounds.nx0--; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; + } + } + + // Bounds x1 + if (curr_loc.x > curr_bounds.x1) { + // Further out than current bounds x1 + curr_bounds.x1 = curr_loc.x; + curr_bounds.nx1 = 1; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { + // Checking already_bounds_changed_x ensures that each net is only added once + // to bounds_changed_nets, lest we add its HPWL change multiple times skewing the + // overall cost change + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_x.push_back(pn->udata); + } + } else if (curr_loc.x == curr_bounds.x1 && old_loc.x < curr_bounds.x1) { + curr_bounds.nx1++; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_x.push_back(pn->udata); + } + } else if (old_loc.x == curr_bounds.x1 && curr_loc.x < curr_bounds.x1) { + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) + mc.bounds_changed_nets_x.push_back(pn->udata); + if (curr_bounds.nx1 == 1) { + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::FULL_RECOMPUTE; + } else { + curr_bounds.nx1--; + if (mc.already_bounds_changed_x[pn->udata] == MoveChangeData::NO_CHANGE) + mc.already_bounds_changed_x[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; + } + } + } + if (mc.already_bounds_changed_y[pn->udata] != MoveChangeData::FULL_RECOMPUTE) { + // Bounds y0 + if (curr_loc.y < curr_bounds.y0) { + // Further out than current bounds y0 + curr_bounds.y0 = curr_loc.y; + curr_bounds.ny0 = 1; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_y.push_back(pn->udata); + } + } else if (curr_loc.y == curr_bounds.y0 && old_loc.y > curr_bounds.y0) { + curr_bounds.ny0++; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_y.push_back(pn->udata); + } + } else if (old_loc.y == curr_bounds.y0 && curr_loc.y > curr_bounds.y0) { + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) + mc.bounds_changed_nets_y.push_back(pn->udata); + if (curr_bounds.ny0 == 1) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE; + } else { + curr_bounds.ny0--; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; + } + } + + // Bounds y1 + if (curr_loc.y > curr_bounds.y1) { + // Further out than current bounds y1 + curr_bounds.y1 = curr_loc.y; + curr_bounds.ny1 = 1; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_y.push_back(pn->udata); + } + } else if (curr_loc.y == curr_bounds.y1 && old_loc.y < curr_bounds.y1) { + curr_bounds.ny1++; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_OUTWARDS; + mc.bounds_changed_nets_y.push_back(pn->udata); + } + } else if (old_loc.y == curr_bounds.y1 && curr_loc.y < curr_bounds.y1) { + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) + mc.bounds_changed_nets_y.push_back(pn->udata); + if (curr_bounds.ny1 == 1) { + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::FULL_RECOMPUTE; + } else { + curr_bounds.ny1--; + if (mc.already_bounds_changed_y[pn->udata] == MoveChangeData::NO_CHANGE) + mc.already_bounds_changed_y[pn->udata] = MoveChangeData::CELL_MOVED_INWARDS; + } + } + } + + if (cfg.timing_driven && int(pn->users.entries()) < cfg.timingFanoutThresh) { + // Output ports - all arcs change timing + if (port.second.type == PORT_OUT) { + int cc; + TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); + if (cls != TMG_IGNORE) + for (auto usr : pn->users.enumerate()) + if (!mc.already_changed_arcs[pn->udata][usr.index.idx()]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr.index)); + mc.already_changed_arcs[pn->udata][usr.index.idx()] = true; + } + } else if (port.second.type == PORT_IN) { + auto usr_idx = port.second.user_idx; + if (!mc.already_changed_arcs[pn->udata][usr_idx.idx()]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr_idx)); + mc.already_changed_arcs[pn->udata][usr_idx.idx()] = true; + } + } + } + } + } + + void compute_cost_changes(MoveChangeData &md) + { + for (const auto &bc : md.bounds_changed_nets_x) { + if (md.already_bounds_changed_x[bc] == MoveChangeData::FULL_RECOMPUTE) + md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]); + } + for (const auto &bc : md.bounds_changed_nets_y) { + if (md.already_bounds_changed_x[bc] != MoveChangeData::FULL_RECOMPUTE && + md.already_bounds_changed_y[bc] == MoveChangeData::FULL_RECOMPUTE) + md.new_net_bounds[bc] = get_net_bounds(net_by_udata[bc]); + } + + for (const auto &bc : md.bounds_changed_nets_x) + md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg); + for (const auto &bc : md.bounds_changed_nets_y) + if (md.already_bounds_changed_x[bc] == MoveChangeData::NO_CHANGE) + md.wirelen_delta += md.new_net_bounds[bc].hpwl(cfg) - net_bounds[bc].hpwl(cfg); + + if (cfg.timing_driven) { + for (const auto &tc : md.changed_arcs) { + double old_cost = net_arc_tcost.at(tc.first).at(tc.second.idx()); + double new_cost = + get_timing_cost(net_by_udata.at(tc.first), net_by_udata.at(tc.first)->users.at(tc.second)); + md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); + md.timing_delta += (new_cost - old_cost); + md.already_changed_arcs[tc.first][tc.second.idx()] = false; + } + } + } + + void commit_cost_changes(MoveChangeData &md) + { + for (const auto &bc : md.bounds_changed_nets_x) + net_bounds[bc] = md.new_net_bounds[bc]; + for (const auto &bc : md.bounds_changed_nets_y) + net_bounds[bc] = md.new_net_bounds[bc]; + for (const auto &tc : md.new_arc_costs) + net_arc_tcost[tc.first.first].at(tc.first.second.idx()) = tc.second; + curr_wirelen_cost += md.wirelen_delta; + curr_timing_cost += md.timing_delta; + } + + // Simple routeability driven placement + const int large_cell_thresh = 50; + int total_net_share = 0; + std::vector>> nets_by_tile; + void setup_nets_by_tile() + { + total_net_share = 0; + nets_by_tile.resize(max_x + 1, std::vector>(max_y + 1)); + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (int(ci->ports.size()) > large_cell_thresh) + continue; + Loc loc = ctx->getBelLocation(ci->bel); + auto &nbt = nets_by_tile.at(loc.x).at(loc.y); + for (const auto &port : ci->ports) { + if (port.second.net == nullptr) + continue; + if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel)) + continue; + int &s = nbt[port.second.net->name]; + if (s > 0) + ++total_net_share; + ++s; + } + } + } + + int update_nets_by_tile(CellInfo *ci, Loc old_loc, Loc new_loc) + { + if (int(ci->ports.size()) > large_cell_thresh) + return 0; + int loss = 0, gain = 0; + auto &nbt_old = nets_by_tile.at(old_loc.x).at(old_loc.y); + auto &nbt_new = nets_by_tile.at(new_loc.x).at(new_loc.y); + + for (const auto &port : ci->ports) { + if (port.second.net == nullptr) + continue; + if (port.second.net->driver.cell == nullptr || ctx->getBelGlobalBuf(port.second.net->driver.cell->bel)) + continue; + int &o = nbt_old[port.second.net->name]; + --o; + NPNR_ASSERT(o >= 0); + if (o > 0) + ++loss; + int &n = nbt_new[port.second.net->name]; + if (n > 0) + ++gain; + ++n; + } + int delta = gain - loss; + total_net_share += delta; + return delta; + } + + // Get the combined wirelen/timing metric + inline double curr_metric() + { + return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost - cfg.netShareWeight * total_net_share; + } + + // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds + std::vector net_bounds; + // Map net arcs to their timing cost (criticality * delay ns) + std::vector> net_arc_tcost; + + // Fast lookup for cell to clusters + dict> cluster2cell; + + // Wirelength and timing cost at last and current iteration + wirelen_t last_wirelen_cost, curr_wirelen_cost; + double last_timing_cost, curr_timing_cost; + + Context *ctx; + float temp = 10; + float crit_exp = 8; + float lambda = 0.5; + bool improved = false; + int n_move, n_accept; + int diameter = 35, max_x = 1, max_y = 1; + dict> bel_types; + dict region_bounds; + FastBels fast_bels; + pool locked_bels; + std::vector net_by_udata; + std::vector old_udata; + bool require_legal = true; + const int legalise_dia = 4; + Placer1Cfg cfg; + + TimingAnalyser tmg; +}; + +Placer1Cfg::Placer1Cfg(Context *ctx) +{ + constraintWeight = ctx->setting("placer1/constraintWeight", 10); + netShareWeight = ctx->setting("placer1/netShareWeight", 0); + minBelsForGridPick = ctx->setting("placer1/minBelsForGridPick", 64); + budgetBased = ctx->setting("placer1/budgetBased", false); + startTemp = ctx->setting("placer1/startTemp", 1); + timingFanoutThresh = std::numeric_limits::max(); + timing_driven = ctx->setting("timing_driven"); + slack_redist_iter = ctx->setting("slack_redist_iter"); + hpwl_scale_x = 1; + hpwl_scale_y = 1; +} + +bool placer1(Context *ctx, Placer1Cfg cfg) +{ + try { + SAPlacer placer(ctx, cfg); + placer.place(); + log_info("Checksum: 0x%08x\n", ctx->checksum()); +#ifndef NDEBUG + ctx->lock(); + ctx->check(); + ctx->unlock(); +#endif + return true; + } catch (log_execution_error_exception) { +#ifndef NDEBUG + ctx->lock(); + ctx->check(); + ctx->unlock(); +#endif + return false; + } +} + +bool placer1_refine(Context *ctx, Placer1Cfg cfg) +{ + try { + SAPlacer placer(ctx, cfg); + placer.place(true); + log_info("Checksum: 0x%08x\n", ctx->checksum()); +#ifndef NDEBUG + ctx->lock(); + ctx->check(); + ctx->unlock(); +#endif + return true; + } catch (log_execution_error_exception) { +#ifndef NDEBUG + ctx->lock(); + ctx->check(); + ctx->unlock(); +#endif + return false; + } +} + +NEXTPNR_NAMESPACE_END diff --git a/common/place/placer1.h b/common/place/placer1.h new file mode 100644 index 00000000..9dfb0b0d --- /dev/null +++ b/common/place/placer1.h @@ -0,0 +1,45 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ +#ifndef PLACE_H +#define PLACE_H + +#include "log.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct Placer1Cfg +{ + Placer1Cfg(Context *ctx); + float constraintWeight, netShareWeight; + int minBelsForGridPick; + bool budgetBased; + float startTemp; + int timingFanoutThresh; + bool timing_driven; + int slack_redist_iter; + int hpwl_scale_x, hpwl_scale_y; +}; + +extern bool placer1(Context *ctx, Placer1Cfg cfg); +extern bool placer1_refine(Context *ctx, Placer1Cfg cfg); + +NEXTPNR_NAMESPACE_END + +#endif // PLACE_H diff --git a/common/place/placer_heap.cc b/common/place/placer_heap.cc new file mode 100644 index 00000000..4c9ffb23 --- /dev/null +++ b/common/place/placer_heap.cc @@ -0,0 +1,1830 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2019 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * [[cite]] HeAP + * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + * [[cite]] SimPL + * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + * + * Notable changes from the original algorithm + * - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is + * added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation) + * as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by + * spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices. + * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) + */ + +#ifdef WITH_HEAP + +#include "placer_heap.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fast_bels.h" +#include "log.h" +#include "nextpnr.h" +#include "parallel_refine.h" +#include "place_common.h" +#include "placer1.h" +#include "scope_lock.h" +#include "timing.h" +#include "util.h" + +NEXTPNR_NAMESPACE_BEGIN + +namespace { +// A simple internal representation for a sparse system of equations Ax = rhs +// This is designed to decouple the functions that build the matrix to the engine that +// solves it, and the representation that requires +template struct EquationSystem +{ + + EquationSystem(size_t rows, size_t cols) + { + A.resize(cols); + rhs.resize(rows); + } + + // Simple sparse format, easy to convert to CCS for solver + std::vector>> A; // col -> (row, x[row, col]) sorted by row + std::vector rhs; // RHS vector + void reset() + { + for (auto &col : A) + col.clear(); + std::fill(rhs.begin(), rhs.end(), T()); + } + + void add_coeff(int row, int col, T val) + { + auto &Ac = A.at(col); + // Binary search + int b = 0, e = int(Ac.size()) - 1; + while (b <= e) { + int i = (b + e) / 2; + if (Ac.at(i).first == row) { + Ac.at(i).second += val; + return; + } + if (Ac.at(i).first > row) + e = i - 1; + else + b = i + 1; + } + Ac.insert(Ac.begin() + b, std::make_pair(row, val)); + } + + void add_rhs(int row, T val) { rhs[row] += val; } + + void solve(std::vector &x, float tolerance) + { + using namespace Eigen; + if (x.empty()) + return; + NPNR_ASSERT(x.size() == A.size()); + + VectorXd vx(x.size()), vb(rhs.size()); + SparseMatrix mat(A.size(), A.size()); + + std::vector colnnz; + for (auto &Ac : A) + colnnz.push_back(int(Ac.size())); + mat.reserve(colnnz); + for (int col = 0; col < int(A.size()); col++) { + auto &Ac = A.at(col); + for (auto &el : Ac) + mat.insert(el.first, col) = el.second; + } + + for (int i = 0; i < int(x.size()); i++) + vx[i] = x.at(i); + for (int i = 0; i < int(rhs.size()); i++) + vb[i] = rhs.at(i); + + ConjugateGradient, Lower | Upper> solver; + solver.setTolerance(tolerance); + VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx); + for (int i = 0; i < int(x.size()); i++) + x.at(i) = xr[i]; + // for (int i = 0; i < int(x.size()); i++) + // log_info("x[%d] = %f\n", i, x.at(i)); + } +}; + +} // namespace + +class HeAPPlacer +{ + public: + HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) + : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx) + { + Eigen::initParallel(); + tmg.setup_only = true; + tmg.setup(); + + for (auto &cell : ctx->cells) + if (cell.second->cluster != ClusterId()) + cluster2cells[cell.second->cluster].push_back(cell.second.get()); + } + + bool place() + { + auto startt = std::chrono::high_resolution_clock::now(); + + ScopeLock lock(ctx); + place_constraints(); + build_fast_bels(); + seed_placement(); + update_all_chains(); + wirelen_t hpwl = total_hpwl(); + log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n", + int(place_cells.size()), int(hpwl)); + for (int i = 0; i < 4; i++) { + setup_solve_cells(); + auto solve_startt = std::chrono::high_resolution_clock::now(); +#ifdef NPNR_DISABLE_THREADS + build_solve_direction(false, -1); + build_solve_direction(true, -1); +#else + boost::thread xaxis([&]() { build_solve_direction(false, -1); }); + build_solve_direction(true, -1); + xaxis.join(); +#endif + auto solve_endt = std::chrono::high_resolution_clock::now(); + solve_time += std::chrono::duration(solve_endt - solve_startt).count(); + + update_all_chains(); + + hpwl = total_hpwl(); + log_info(" at initial placer iter %d, wirelen = %d\n", i, int(hpwl)); + } + + wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); + int iter = 0, stalled = 0; + + std::vector> solution; + + std::vector> heap_runs; + pool all_buckets; + dict bucket_count; + + for (auto cell : place_cells) { + BelBucketId bucket = ctx->getBelBucketForCellType(cell->type); + if (!all_buckets.count(bucket)) { + heap_runs.push_back(pool{bucket}); + all_buckets.insert(bucket); + } + bucket_count[bucket]++; + } + // If more than 98% of cells are one cell type, always solve all at once + // Otherwise, follow full HeAP strategy of rotate&all + for (auto &c : bucket_count) { + if (c.second >= 0.98 * int(place_cells.size())) { + heap_runs.clear(); + break; + } + } + + if (cfg.placeAllAtOnce) { + // Never want to deal with LUTs, FFs, MUXFxs separately, + // for now disable all single-cell-type runs and only have heterogeneous + // runs + heap_runs.clear(); + } + + heap_runs.push_back(all_buckets); + // The main HeAP placer loop + log_info("Running main analytical placer.\n"); + while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) { + // Alternate between particular bel types and all bels + for (auto &run : heap_runs) { + auto run_startt = std::chrono::high_resolution_clock::now(); + + setup_solve_cells(&run); + if (solve_cells.empty()) + continue; + // Heuristic: don't bother with threading below a certain size + auto solve_startt = std::chrono::high_resolution_clock::now(); + + // Build the connectivity matrix and run the solver; multithreaded between x and y axes if applicable +#ifndef NPNR_DISABLE_THREADS + if (solve_cells.size() >= 500) { + boost::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); }); + build_solve_direction(true, (iter == 0) ? -1 : iter); + xaxis.join(); + } else +#endif + { + build_solve_direction(false, (iter == 0) ? -1 : iter); + build_solve_direction(true, (iter == 0) ? -1 : iter); + } + auto solve_endt = std::chrono::high_resolution_clock::now(); + solve_time += std::chrono::duration(solve_endt - solve_startt).count(); + update_all_chains(); + solved_hpwl = total_hpwl(); + + update_all_chains(); + + // Run the spreader + for (const auto &group : cfg.cellGroups) + CutSpreader(this, group).run(); + + for (auto type : run) + if (std::all_of(cfg.cellGroups.begin(), cfg.cellGroups.end(), + [type](const pool &grp) { return !grp.count(type); })) + CutSpreader(this, {type}).run(); + + // Run strict legalisation to find a valid bel for all cells + update_all_chains(); + spread_hpwl = total_hpwl(); + legalise_placement_strict(true); + update_all_chains(); + + legal_hpwl = total_hpwl(); + auto run_stopt = std::chrono::high_resolution_clock::now(); + + IdString bucket_name = ctx->getBelBucketName(*run.begin()); + log_info(" at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n", + iter + 1, (run.size() > 1 ? "ALL" : bucket_name.c_str(ctx)), int(solved_hpwl), + int(spread_hpwl), int(legal_hpwl), + std::chrono::duration(run_stopt - run_startt).count()); + } + + // Update timing weights + if (cfg.timing_driven) + tmg.run(); + + if (legal_hpwl < best_hpwl) { + best_hpwl = legal_hpwl; + stalled = 0; + // Save solution + solution.clear(); + for (auto &cell : ctx->cells) { + solution.emplace_back(cell.second.get(), cell.second->bel, cell.second->belStrength); + } + } else { + ++stalled; + } + for (auto &cl : cell_locs) { + cl.second.legal_x = cl.second.x; + cl.second.legal_y = cl.second.y; + } + ctx->yield(); + ++iter; + } + + // Apply saved solution + for (auto &sc : solution) { + CellInfo *cell = std::get<0>(sc); + if (cell->bel != BelId()) + ctx->unbindBel(cell->bel); + } + for (auto &sc : solution) { + CellInfo *cell; + BelId bel; + PlaceStrength strength; + std::tie(cell, bel, strength) = sc; + ctx->bindBel(bel, cell, strength); + } + + for (auto &cell : ctx->cells) { + if (cell.second->bel == BelId()) + log_error("Found unbound cell %s\n", cell.first.c_str(ctx)); + if (ctx->getBoundBelCell(cell.second->bel) != cell.second.get()) + log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx)); + if (ctx->debug) + log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->nameOfBel(cell.second->bel)); + } + + bool any_bad_placements = false; + for (auto bel : ctx->getBels()) { + CellInfo *cell = ctx->getBoundBelCell(bel); + if (!ctx->isBelLocationValid(bel)) { + std::string cell_text = "no cell"; + if (cell != nullptr) + cell_text = std::string("cell '") + ctx->nameOf(cell) + "'"; + log_warning("post-placement validity check failed for Bel '%s' " + "(%s)\n", + ctx->nameOfBel(bel), cell_text.c_str()); + any_bad_placements = true; + } + } + + if (any_bad_placements) { + return false; + } + + auto endtt = std::chrono::high_resolution_clock::now(); + log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); + log_info(" of which solving equations: %.02fs\n", solve_time); + log_info(" of which spreading cells: %.02fs\n", cl_time); + log_info(" of which strict legalisation: %.02fs\n", sl_time); + + ctx->check(); + lock.unlock_early(); + +#if !defined(__wasm) + if (cfg.parallelRefine) { + if (!parallel_refine(ctx, ParallelRefineCfg(ctx))) { + return false; + } + } else +#endif + { + if (!placer1_refine(ctx, Placer1Cfg(ctx))) { + return false; + } + } + + return true; + } + + private: + Context *ctx; + PlacerHeapCfg cfg; + + int max_x = 0, max_y = 0; + FastBels fast_bels; + dict> bel_types; + + TimingAnalyser tmg; + + struct BoundingBox + { + // Actual bounding box + int x0 = 0, x1 = 0, y0 = 0, y1 = 0; + }; + + dict constraint_region_bounds; + + // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom + // structure instead + struct CellLocation + { + int x, y; + int legal_x, legal_y; + double rawx, rawy; + bool locked, global; + }; + dict cell_locs; + // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains + // (only the root of each macro is placed.) + std::vector place_cells; + + // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place + // cells of a certain type) + std::vector solve_cells; + + dict> cluster2cells; + dict chain_size; + // Performance counting + double solve_time = 0, cl_time = 0, sl_time = 0; + + // Place cells with the BEL attribute set to constrain them + void place_constraints() + { + size_t placed_cells = 0; + // Initial constraints placer + for (auto &cell_entry : ctx->cells) { + CellInfo *cell = cell_entry.second.get(); + + auto loc = cell->attrs.find(ctx->id("BEL")); + if (loc != cell->attrs.end()) { + std::string loc_name = loc->second.as_string(); + BelId bel = ctx->getBelByNameStr(loc_name); + if (bel == BelId()) { + log_error("No Bel named \'%s\' located for " + "this chip (processing BEL attribute on \'%s\')\n", + loc_name.c_str(), cell->name.c_str(ctx)); + } + + if (!ctx->isValidBelForCellType(cell->type, bel)) { + IdString bel_type = ctx->getBelType(bel); + log_error("Bel \'%s\' of type \'%s\' does not match cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + auto bound_cell = ctx->getBoundBelCell(bel); + if (bound_cell) { + log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", + cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); + } + + ctx->bindBel(bel, cell, STRENGTH_USER); + if (!ctx->isBelLocationValid(bel)) { + IdString bel_type = ctx->getBelType(bel); + log_error("Bel \'%s\' of type \'%s\' is not valid for cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + placed_cells++; + } + } + log_info("Placed %d cells based on constraints.\n", int(placed_cells)); + ctx->yield(); + } + + void build_fast_bels() + { + for (auto bel : ctx->getBels()) { + if (!ctx->checkBelAvail(bel)) + continue; + Loc loc = ctx->getBelLocation(bel); + max_x = std::max(max_x, loc.x); + max_y = std::max(max_y, loc.y); + } + + pool cell_types_in_use; + pool buckets_in_use; + for (auto &cell : ctx->cells) { + IdString cell_type = cell.second->type; + cell_types_in_use.insert(cell_type); + BelBucketId bucket = ctx->getBelBucketForCellType(cell_type); + buckets_in_use.insert(bucket); + } + + for (auto cell_type : cell_types_in_use) { + fast_bels.addCellType(cell_type); + } + for (auto bucket : buckets_in_use) { + fast_bels.addBelBucket(bucket); + } + + // Determine bounding boxes of region constraints + for (auto ®ion : ctx->region) { + Region *r = region.second.get(); + BoundingBox bb; + if (r->constr_bels) { + bb.x0 = std::numeric_limits::max(); + bb.x1 = std::numeric_limits::min(); + bb.y0 = std::numeric_limits::max(); + bb.y1 = std::numeric_limits::min(); + for (auto bel : r->bels) { + Loc loc = ctx->getBelLocation(bel); + bb.x0 = std::min(bb.x0, loc.x); + bb.x1 = std::max(bb.x1, loc.x); + bb.y0 = std::min(bb.y0, loc.y); + bb.y1 = std::max(bb.y1, loc.y); + } + } else { + bb.x0 = 0; + bb.y0 = 0; + bb.x1 = max_x; + bb.y1 = max_y; + } + constraint_region_bounds[r->name] = bb; + } + } + + // Build and solve in one direction + void build_solve_direction(bool yaxis, int iter) + { + for (int i = 0; i < 5; i++) { + EquationSystem esx(solve_cells.size(), solve_cells.size()); + build_equations(esx, yaxis, iter); + solve_equations(esx, yaxis); + } + } + + // Check if a cell has any meaningful connectivity + bool has_connectivity(CellInfo *cell) + { + for (auto port : cell->ports) { + if (port.second.net != nullptr && port.second.net->driver.cell != nullptr && + !port.second.net->users.empty()) + return true; + } + return false; + } + + // Build up a random initial placement, without regard to legality + // FIXME: Are there better approaches to the initial placement (e.g. greedy?) + void seed_placement() + { + pool cell_types; + for (const auto &cell : ctx->cells) { + cell_types.insert(cell.second->type); + } + + pool bels_used; + dict> available_bels; + + for (auto bel : ctx->getBels()) { + if (!ctx->checkBelAvail(bel)) { + continue; + } + + for (auto cell_type : cell_types) { + if (ctx->isValidBelForCellType(cell_type, bel)) { + available_bels[cell_type].push_back(bel); + } + } + } + + for (auto &t : available_bels) { + ctx->shuffle(t.second.begin(), t.second.end()); + } + + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->bel != BelId()) { + Loc loc = ctx->getBelLocation(ci->bel); + cell_locs[cell.first].x = loc.x; + cell_locs[cell.first].y = loc.y; + cell_locs[cell.first].locked = true; + cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); + } else if (ci->cluster == ClusterId() || ctx->getClusterRootCell(ci->cluster) == ci) { + bool placed = false; + int attempt_count = 0; + while (!placed) { + ++attempt_count; + if (attempt_count > 25000) { + log_error("Unable to find a placement location for cell '%s'\n", ci->name.c_str(ctx)); + } + + // Make sure this cell type is in the available BEL map at + // all. + if (!available_bels.count(ci->type)) { + log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n", + ci->name.c_str(ctx), ci->type.c_str(ctx)); + } + + // Find an unused BEL from bels_for_cell_type. + auto &bels_for_cell_type = available_bels.at(ci->type); + BelId bel; + while (true) { + if (bels_for_cell_type.empty()) { + log_error("Unable to place cell '%s', no BELs remaining to implement cell type '%s'\n", + ci->name.c_str(ctx), ci->type.c_str(ctx)); + } + + BelId candidate_bel = bels_for_cell_type.back(); + bels_for_cell_type.pop_back(); + if (bels_used.count(candidate_bel)) { + // candidate_bel has already been used by another + // cell type, skip it. + continue; + } + + bel = candidate_bel; + break; + } + + Loc loc = ctx->getBelLocation(bel); + cell_locs[cell.first].x = loc.x; + cell_locs[cell.first].y = loc.y; + cell_locs[cell.first].locked = false; + cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); + + // FIXME + if (has_connectivity(cell.second.get()) && !cfg.ioBufTypes.count(ci->type)) { + bels_used.insert(bel); + place_cells.push_back(ci); + placed = true; + } else { + ctx->bindBel(bel, ci, STRENGTH_STRONG); + if (ctx->isBelLocationValid(bel)) { + cell_locs[cell.first].locked = true; + placed = true; + bels_used.insert(bel); + } else { + ctx->unbindBel(bel); + available_bels.at(ci->type).push_front(bel); + } + } + } + } + } + } + + // Setup the cells to be solved, returns the number of rows + int setup_solve_cells(pool *buckets = nullptr) + { + int row = 0; + solve_cells.clear(); + // First clear the udata of all cells + for (auto &cell : ctx->cells) + cell.second->udata = dont_solve; + // Then update cells to be placed, which excludes cell children + for (auto cell : place_cells) { + if (buckets && !buckets->count(ctx->getBelBucketForCellType(cell->type))) + continue; + cell->udata = row++; + solve_cells.push_back(cell); + } + // Finally, update the udata of children + for (auto &cluster : cluster2cells) + for (auto child : cluster.second) + child->udata = ctx->getClusterRootCell(cluster.first)->udata; + return row; + } + + // Update all chains + void update_all_chains() + { + for (auto cell : place_cells) { + chain_size[cell->name] = 1; + if (cell->cluster != ClusterId()) { + const auto base = cell_locs[cell->name]; + for (auto child : cluster2cells.at(cell->cluster)) { + if (child->type == cell->type && child != cell) + chain_size[cell->name]++; + Loc offset = ctx->getClusterOffset(child); + cell_locs[child->name].x = std::max(0, std::min(max_x, base.x + offset.x)); + cell_locs[child->name].y = std::max(0, std::min(max_y, base.y + offset.y)); + } + } + } + } + + // Run a function on all ports of a net - including the driver and all users + template void foreach_port(NetInfo *net, Tf func) + { + if (net->driver.cell != nullptr) + func(net->driver, store_index()); + for (auto usr : net->users.enumerate()) + func(usr.value, usr.index); + } + + // Build the system of equations for either X or Y + void build_equations(EquationSystem &es, bool yaxis, int iter = -1) + { + // Return the x or y position of a cell, depending on ydir + auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; + auto legal_pos = [&](CellInfo *cell) { + return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; + }; + + es.reset(); + + for (auto &net : ctx->nets) { + NetInfo *ni = net.second.get(); + if (ni->driver.cell == nullptr) + continue; + if (ni->users.empty()) + continue; + if (cell_locs.at(ni->driver.cell->name).global) + continue; + // Find the bounds of the net in this axis, and the ports that correspond to these bounds + PortRef *lbport = nullptr, *ubport = nullptr; + int lbpos = std::numeric_limits::max(), ubpos = std::numeric_limits::min(); + foreach_port(ni, [&](PortRef &port, store_index user_idx) { + int pos = cell_pos(port.cell); + if (pos < lbpos) { + lbpos = pos; + lbport = &port; + } + if (pos > ubpos) { + ubpos = pos; + ubport = &port; + } + }); + NPNR_ASSERT(lbport != nullptr); + NPNR_ASSERT(ubport != nullptr); + + auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) { + if (eqn.cell->udata == dont_solve) + return; + int row = eqn.cell->udata; + int v_pos = cell_pos(var.cell); + if (var.cell->udata != dont_solve) { + es.add_coeff(row, var.cell->udata, weight); + } else { + es.add_rhs(row, -v_pos * weight); + } + if (var.cell->cluster != ClusterId()) { + Loc offset = ctx->getClusterOffset(var.cell); + es.add_rhs(row, -(yaxis ? offset.y : offset.x) * weight); + } + }; + + // Add all relevant connections to the matrix + foreach_port(ni, [&](PortRef &port, store_index user_idx) { + int this_pos = cell_pos(port.cell); + auto process_arc = [&](PortRef *other) { + if (other == &port) + return; + int o_pos = cell_pos(other->cell); + double weight = 1.0 / (ni->users.entries() * + std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * + std::abs(o_pos - this_pos))); + + if (user_idx) { + weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)), + cfg.criticalityExponent)); + } + + // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, + // if the other end isn't fixed + stamp_equation(port, port, weight); + stamp_equation(port, *other, -weight); + stamp_equation(*other, *other, weight); + stamp_equation(*other, port, -weight); + }; + process_arc(lbport); + process_arc(ubport); + }); + } + if (iter != -1) { + float alpha = cfg.alpha; + for (size_t row = 0; row < solve_cells.size(); row++) { + int l_pos = legal_pos(solve_cells.at(row)); + int c_pos = cell_pos(solve_cells.at(row)); + + double weight = + alpha * iter / + std::max(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(l_pos - c_pos)); + // Add an arc from legalised to current position + es.add_coeff(row, row, weight); + es.add_rhs(row, weight * l_pos); + } + } + } + + // Build the system of equations for either X or Y + void solve_equations(EquationSystem &es, bool yaxis) + { + // Return the x or y position of a cell, depending on ydir + auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; + std::vector vals; + std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); + es.solve(vals, cfg.solverTolerance); + for (size_t i = 0; i < vals.size(); i++) + if (yaxis) { + cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); + cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i)))); + if (solve_cells.at(i)->region != nullptr) + cell_locs.at(solve_cells.at(i)->name).y = + limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).y, true); + } else { + cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); + cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); + if (solve_cells.at(i)->region != nullptr) + cell_locs.at(solve_cells.at(i)->name).x = + limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).x, false); + } + } + + // Compute HPWL + wirelen_t total_hpwl() + { + wirelen_t hpwl = 0; + for (auto &net : ctx->nets) { + NetInfo *ni = net.second.get(); + if (ni->driver.cell == nullptr) + continue; + CellLocation &drvloc = cell_locs.at(ni->driver.cell->name); + if (drvloc.global) + continue; + int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y; + for (auto &user : ni->users) { + CellLocation &usrloc = cell_locs.at(user.cell->name); + xmin = std::min(xmin, usrloc.x); + xmax = std::max(xmax, usrloc.x); + ymin = std::min(ymin, usrloc.y); + ymax = std::max(ymax, usrloc.y); + } + hpwl += cfg.hpwl_scale_x * (xmax - xmin) + cfg.hpwl_scale_y * (ymax - ymin); + } + return hpwl; + } + + // Strict placement legalisation, performed after the initial HeAP spreading + void legalise_placement_strict(bool require_validity = false) + { + auto startt = std::chrono::high_resolution_clock::now(); + + // Unbind all cells placed in this solution + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->bel != BelId() && + (ci->udata != dont_solve || + (ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve))) + ctx->unbindBel(ci->bel); + } + + // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using + // the simple greedy largest-macro-first approach. + std::priority_queue> remaining; + for (auto cell : solve_cells) { + remaining.emplace(chain_size[cell->name], cell->name); + } + int ripup_radius = 2; + int total_iters = 0; + int total_iters_noreset = 0; + while (!remaining.empty()) { + auto top = remaining.top(); + remaining.pop(); + + CellInfo *ci = ctx->cells.at(top.second).get(); + // Was now placed, ignore + if (ci->bel != BelId()) + continue; + // log_info(" Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx)); + FastBels::FastBelsData *fb; + fast_bels.getBelsForCellType(ci->type, &fb); + int radius = 0; + int iter = 0; + int iter_at_radius = 0; + bool placed = false; + BelId bestBel; + int best_inp_len = std::numeric_limits::max(); + + total_iters++; + total_iters_noreset++; + if (total_iters > int(solve_cells.size())) { + total_iters = 0; + ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2); + } + + if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) { + log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n"); + } + + while (!placed) { + + // Set a conservative timeout + if (iter > std::max(10000, 3 * int(ctx->cells.size()))) + log_error("Unable to find legal placement for cell '%s', check constraints and utilisation.\n", + ctx->nameOf(ci)); + + // Determine a search radius around the solver location (which increases over time) that is clamped to + // the region constraint for the cell (if applicable) + int rx = radius, ry = radius; + + if (ci->region != nullptr) { + rx = std::min(radius, (constraint_region_bounds[ci->region->name].x1 - + constraint_region_bounds[ci->region->name].x0) / + 2 + + 1); + ry = std::min(radius, (constraint_region_bounds[ci->region->name].y1 - + constraint_region_bounds[ci->region->name].y0) / + 2 + + 1); + } + + // Pick a random X and Y location within our search radius + int nx = ctx->rng(2 * rx + 1) + std::max(cell_locs.at(ci->name).x - rx, 0); + int ny = ctx->rng(2 * ry + 1) + std::max(cell_locs.at(ci->name).y - ry, 0); + + iter++; + iter_at_radius++; + if (iter >= (10 * (radius + 1))) { + // No luck yet, increase radius + radius = std::min(std::max(max_x, max_y), radius + 1); + while (radius < std::max(max_x, max_y)) { + // Keep increasing the radius until it will actually increase the number of cells we are + // checking (e.g. BRAM and DSP will not be in all cols/rows), so we don't waste effort + for (int x = std::max(0, cell_locs.at(ci->name).x - radius); + x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) { + if (x >= int(fb->size())) + break; + for (int y = std::max(0, cell_locs.at(ci->name).y - radius); + y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) { + if (y >= int(fb->at(x).size())) + break; + if (fb->at(x).at(y).size() > 0) + goto notempty; + } + } + radius = std::min(std::max(max_x, max_y), radius + 1); + } + notempty: + iter_at_radius = 0; + iter = 0; + } + // If our randomly chosen cooridnate is out of bounds; or points to a tile with no relevant bels; ignore + // it + if (nx < 0 || nx > max_x) + continue; + if (ny < 0 || ny > max_y) + continue; + + if (nx >= int(fb->size())) + continue; + if (ny >= int(fb->at(nx).size())) + continue; + if (fb->at(nx).at(ny).empty()) + continue; + + // The number of attempts to find a location to try + int need_to_explore = 2 * radius; + + // If we have found at least one legal location; and made enough attempts; assume it's good enough and + // finish + if (iter_at_radius >= need_to_explore && bestBel != BelId()) { + CellInfo *bound = ctx->getBoundBelCell(bestBel); + if (bound != nullptr) { + ctx->unbindBel(bound->bel); + remaining.emplace(chain_size[bound->name], bound->name); + } + ctx->bindBel(bestBel, ci, STRENGTH_WEAK); + placed = true; + Loc loc = ctx->getBelLocation(bestBel); + cell_locs[ci->name].x = loc.x; + cell_locs[ci->name].y = loc.y; + break; + } + + if (ci->cluster == ClusterId()) { + // The case where we have no relative constraints + for (auto sz : fb->at(nx).at(ny)) { + // Look through all bels in this tile; checking region constraint if applicable + if (!ci->testRegion(sz)) + continue; + // Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets) + // or occasionally trigger a tiebreaker + if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) { + CellInfo *bound = ctx->getBoundBelCell(sz); + if (bound != nullptr) { + // Only rip up cells without constraints + if (bound->cluster != ClusterId()) + continue; + ctx->unbindBel(bound->bel); + } + // Provisionally bind the bel + ctx->bindBel(sz, ci, STRENGTH_WEAK); + if (require_validity && !ctx->isBelLocationValid(sz)) { + // New location is not legal; unbind the cell (and rebind the cell we ripped up if + // applicable) + ctx->unbindBel(sz); + if (bound != nullptr) + ctx->bindBel(sz, bound, STRENGTH_WEAK); + } else if (iter_at_radius < need_to_explore) { + // It's legal, but we haven't tried enough locations yet + ctx->unbindBel(sz); + if (bound != nullptr) + ctx->bindBel(sz, bound, STRENGTH_WEAK); + int input_len = 0; + // Compute a fast input wirelength metric at this bel; and save if better than our last + // try + for (auto &port : ci->ports) { + auto &p = port.second; + if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr) + continue; + CellInfo *drv = p.net->driver.cell; + auto drv_loc = cell_locs.find(drv->name); + if (drv_loc == cell_locs.end()) + continue; + if (drv_loc->second.global) + continue; + input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny); + } + if (input_len < best_inp_len) { + best_inp_len = input_len; + bestBel = sz; + } + break; + } else { + // It's legal, and we've tried enough. Finish. + if (bound != nullptr) + remaining.emplace(chain_size[bound->name], bound->name); + Loc loc = ctx->getBelLocation(sz); + cell_locs[ci->name].x = loc.x; + cell_locs[ci->name].y = loc.y; + placed = true; + break; + } + } + } + } else { + // We do have relative constraints + for (auto sz : fb->at(nx).at(ny)) { + // List of cells and their destination + std::vector> targets; + // List of bels we placed things at; and the cell that was there before if applicable + std::vector> swaps_made; + + if (!ctx->getClusterPlacement(ci->cluster, sz, targets)) + continue; + + for (auto &target : targets) { + // Check it satisfies the region constraint if applicable + if (!target.first->testRegion(target.second)) + goto fail; + CellInfo *bound = ctx->getBoundBelCell(target.second); + // Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain + if (bound != nullptr) + if (bound->cluster != ClusterId() || bound->belStrength > STRENGTH_WEAK) + goto fail; + } + // Actually perform the move; keeping track of the moves we make so we can revert them if needed + for (auto &target : targets) { + CellInfo *bound = ctx->getBoundBelCell(target.second); + if (bound != nullptr) + ctx->unbindBel(target.second); + ctx->bindBel(target.second, target.first, STRENGTH_STRONG); + swaps_made.emplace_back(target.second, bound); + } + // Check that the move we have made is legal + for (auto &sm : swaps_made) { + if (!ctx->isBelLocationValid(sm.first)) + goto fail; + } + + if (false) { + fail: + // If the move turned out to be illegal; revert all the moves we made + for (auto &swap : swaps_made) { + ctx->unbindBel(swap.first); + if (swap.second != nullptr) + ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK); + } + continue; + } + for (auto &target : targets) { + Loc loc = ctx->getBelLocation(target.second); + cell_locs[target.first->name].x = loc.x; + cell_locs[target.first->name].y = loc.y; + // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z); + } + for (auto &swap : swaps_made) { + // Where we have ripped up cells; add them to the queue + if (swap.second != nullptr) + remaining.emplace(chain_size[swap.second->name], swap.second->name); + } + + placed = true; + break; + } + } + } + } + auto endt = std::chrono::high_resolution_clock::now(); + sl_time += std::chrono::duration(endt - startt).count(); + } + // Implementation of the cut-based spreading as described in the HeAP/SimPL papers + + template T limit_to_reg(Region *reg, T val, bool dir) + { + if (reg == nullptr) + return val; + int limit_low = dir ? constraint_region_bounds[reg->name].y0 : constraint_region_bounds[reg->name].x0; + int limit_high = dir ? constraint_region_bounds[reg->name].y1 : constraint_region_bounds[reg->name].x1; + return std::max(std::min(val, limit_high), limit_low); + } + + struct ChainExtent + { + int x0, y0, x1, y1; + }; + + struct SpreaderRegion + { + int id; + int x0, y0, x1, y1; + std::vector cells, bels; + bool overused(float beta) const + { + for (size_t t = 0; t < cells.size(); t++) { + if (bels.at(t) < 4) { + if (cells.at(t) > bels.at(t)) + return true; + } else { + if (cells.at(t) > beta * bels.at(t)) + return true; + } + } + return false; + } + }; + + class CutSpreader + { + public: + CutSpreader(HeAPPlacer *p, const pool &buckets) : p(p), ctx(p->ctx), buckets(buckets) + { + // Get fast BELs data for all buckets being Cut/Spread. + size_t idx = 0; + for (BelBucketId bucket : buckets) { + type_index[bucket] = idx; + FastBels::FastBelsData *fast_bels; + p->fast_bels.getBelsForBelBucket(bucket, &fast_bels); + fb.push_back(fast_bels); + ++idx; + NPNR_ASSERT(fb.size() == idx); + } + } + static int seq; + void run() + { + auto startt = std::chrono::high_resolution_clock::now(); + init(); + find_overused_regions(); + for (auto &r : regions) { + if (merged_regions.count(r.id)) + continue; +#if 0 + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); +#endif + } + expand_regions(); + std::queue> workqueue; +#if 0 + std::vector> orig; + if (ctx->debug) + for (auto c : p->solve_cells) + orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy); +#endif + for (auto &r : regions) { + if (merged_regions.count(r.id)) + continue; +#if 0 + for (auto t : sorted(beltype)) { + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", t.c_str(ctx), r.x0, r.y0, r.x1, r.y1, + r.cells.at(type_index.at(t)), r.bels.at(type_index.at(t))); + } + +#endif + workqueue.emplace(r.id, false); + } + while (!workqueue.empty()) { + auto front = workqueue.front(); + workqueue.pop(); + auto &r = regions.at(front.first); + if (std::all_of(r.cells.begin(), r.cells.end(), [](int x) { return x == 0; })) + continue; + auto res = cut_region(r, front.second); + if (res) { + workqueue.emplace(res->first, !front.second); + workqueue.emplace(res->second, !front.second); + } else { + // Try the other dir, in case stuck in one direction only + auto res2 = cut_region(r, !front.second); + if (res2) { + workqueue.emplace(res2->first, front.second); + workqueue.emplace(res2->second, front.second); + } + } + } +#if 0 + if (ctx->debug) { + std::ofstream sp("spread" + std::to_string(seq) + ".csv"); + for (size_t i = 0; i < p->solve_cells.size(); i++) { + auto &c = p->solve_cells.at(i); + if (c->type != beltype) + continue; + sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl; + } + std::ofstream oc("cells" + std::to_string(seq) + ".csv"); + for (size_t y = 0; y <= p->max_y; y++) { + for (size_t x = 0; x <= p->max_x; x++) { + oc << cells_at_location.at(x).at(y).size() << ", "; + } + oc << std::endl; + } + ++seq; + } +#endif + auto endt = std::chrono::high_resolution_clock::now(); + p->cl_time += std::chrono::duration(endt - startt).count(); + } + + private: + HeAPPlacer *p; + Context *ctx; + pool buckets; + dict type_index; + std::vector>> occupancy; + std::vector> groups; + std::vector> chaines; + std::map cell_extents; + + std::vector>> *> fb; + + std::vector regions; + pool merged_regions; + // Cells at a location, sorted by real (not integer) x and y + std::vector>> cells_at_location; + + int occ_at(int x, int y, int type) { return occupancy.at(x).at(y).at(type); } + + int bels_at(int x, int y, int type) + { + if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size())) + return 0; + return int(fb.at(type)->at(x).at(y).size()); + } + + bool is_cell_fixed(const CellInfo &cell) const + { + return buckets.count(ctx->getBelBucketForCellType(cell.type)) == 0; + } + + size_t cell_index(const CellInfo &cell) const { return type_index.at(ctx->getBelBucketForCellType(cell.type)); } + + void init() + { + occupancy.resize(p->max_x + 1, + std::vector>(p->max_y + 1, std::vector(buckets.size(), 0))); + groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); + chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); + cells_at_location.resize(p->max_x + 1, std::vector>(p->max_y + 1)); + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + for (int t = 0; t < int(buckets.size()); t++) { + occupancy.at(x).at(y).at(t) = 0; + } + groups.at(x).at(y) = -1; + chaines.at(x).at(y) = {x, y, x, y}; + } + + auto set_chain_ext = [&](IdString cell, int x, int y) { + if (!cell_extents.count(cell)) + cell_extents[cell] = {x, y, x, y}; + else { + cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x); + cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y); + cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x); + cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y); + } + }; + + for (auto &cell_loc : p->cell_locs) { + IdString cell_name = cell_loc.first; + const CellInfo &cell = *ctx->cells.at(cell_name); + const CellLocation &loc = cell_loc.second; + if (is_cell_fixed(cell)) { + continue; + } + + if (cell.belStrength > STRENGTH_STRONG) { + continue; + } + + occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++; + + // Compute ultimate extent of each chain root + if (cell.cluster != ClusterId()) { + set_chain_ext(ctx->getClusterRootCell(cell.cluster)->name, loc.x, loc.y); + } + } + + for (auto &cell_loc : p->cell_locs) { + IdString cell_name = cell_loc.first; + const CellInfo &cell = *ctx->cells.at(cell_name); + const CellLocation &loc = cell_loc.second; + if (is_cell_fixed(cell)) { + continue; + } + + if (cell.belStrength > STRENGTH_STRONG) { + continue; + } + + // Transfer chain extents to the actual chains structure + ChainExtent *ce = nullptr; + if (cell.cluster != ClusterId()) { + ce = &(cell_extents.at(ctx->getClusterRootCell(cell.cluster)->name)); + } + + if (ce) { + auto &lce = chaines.at(loc.x).at(loc.y); + lce.x0 = std::min(lce.x0, ce->x0); + lce.y0 = std::min(lce.y0, ce->y0); + lce.x1 = std::max(lce.x1, ce->x1); + lce.y1 = std::max(lce.y1, ce->y1); + } + } + + for (auto cell : p->solve_cells) { + if (is_cell_fixed(*cell)) { + continue; + } + + cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); + } + } + + void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee) + { + // Prevent grow_region from recursing while doing this + for (int x = mergee.x0; x <= mergee.x1; x++) { + for (int y = mergee.y0; y <= mergee.y1; y++) { + // log_info("%d %d\n", groups.at(x).at(y), mergee.id); + NPNR_ASSERT(groups.at(x).at(y) == mergee.id); + groups.at(x).at(y) = merged.id; + for (size_t t = 0; t < buckets.size(); t++) { + merged.cells.at(t) += occ_at(x, y, t); + merged.bels.at(t) += bels_at(x, y, t); + } + } + } + + merged_regions.insert(mergee.id); + grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); + } + + void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false) + { + // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); + if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) + return; + int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1; + r.x0 = std::min(r.x0, x0); + r.y0 = std::min(r.y0, y0); + r.x1 = std::max(r.x1, x1); + r.y1 = std::max(r.y1, y1); + + auto process_location = [&](int x, int y) { + // Merge with any overlapping regions + if (groups.at(x).at(y) == -1) { + for (size_t t = 0; t < buckets.size(); t++) { + r.bels.at(t) += bels_at(x, y, t); + r.cells.at(t) += occ_at(x, y, t); + } + } + + if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) + merge_regions(r, regions.at(groups.at(x).at(y))); + groups.at(x).at(y) = r.id; + // Grow to cover any chains + auto &chaine = chaines.at(x).at(y); + grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1); + }; + for (int x = r.x0; x < old_x0; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int x = old_x1 + 1; x <= x1; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int y = r.y0; y < old_y0; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + for (int y = old_y1 + 1; y <= r.y1; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + } + + void find_overused_regions() + { + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + // Either already in a group, or not overutilised. Ignore + if (groups.at(x).at(y) != -1) + continue; + bool overutilised = false; + for (size_t t = 0; t < buckets.size(); t++) { + if (occ_at(x, y, t) > bels_at(x, y, t)) { + overutilised = true; + break; + } + } + + if (!overutilised) + continue; + // log_info("%d %d %d\n", x, y, occ_at(x, y)); + int id = int(regions.size()); + groups.at(x).at(y) = id; + SpreaderRegion reg; + reg.id = id; + reg.x0 = reg.x1 = x; + reg.y0 = reg.y1 = y; + for (size_t t = 0; t < buckets.size(); t++) { + reg.bels.push_back(bels_at(x, y, t)); + reg.cells.push_back(occ_at(x, y, t)); + } + // Make sure we cover carries, etc + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); + + bool expanded = true; + while (expanded) { + expanded = false; + // Keep trying expansion in x and y, until we find no over-occupancy cells + // or hit grouped cells + + // First try expanding in x + if (reg.x1 < p->max_x) { + bool over_occ_x = false; + for (int y1 = reg.y0; y1 <= reg.y1; y1++) { + for (size_t t = 0; t < buckets.size(); t++) { + if (occ_at(reg.x1 + 1, y1, t) > bels_at(reg.x1 + 1, y1, t)) { + over_occ_x = true; + break; + } + } + } + if (over_occ_x) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + } + } + + if (reg.y1 < p->max_y) { + bool over_occ_y = false; + for (int x1 = reg.x0; x1 <= reg.x1; x1++) { + for (size_t t = 0; t < buckets.size(); t++) { + if (occ_at(x1, reg.y1 + 1, t) > bels_at(x1, reg.y1 + 1, t)) { + over_occ_y = true; + break; + } + } + } + if (over_occ_y) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + } + } + } + regions.push_back(reg); + } + } + + void expand_regions() + { + std::queue overu_regions; + float beta = p->cfg.beta; + for (auto &r : regions) { + if (!merged_regions.count(r.id) && r.overused(beta)) + overu_regions.push(r.id); + } + while (!overu_regions.empty()) { + int rid = overu_regions.front(); + overu_regions.pop(); + if (merged_regions.count(rid)) + continue; + auto ® = regions.at(rid); + while (reg.overused(beta)) { + bool changed = false; + for (int j = 0; j < p->cfg.spread_scale_x; j++) { + if (reg.x0 > 0) { + grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); + changed = true; + if (!reg.overused(beta)) + break; + } + if (reg.x1 < p->max_x) { + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + changed = true; + if (!reg.overused(beta)) + break; + } + } + for (int j = 0; j < p->cfg.spread_scale_y; j++) { + if (reg.y0 > 0) { + grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); + changed = true; + if (!reg.overused(beta)) + break; + } + if (reg.y1 < p->max_y) { + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + changed = true; + if (!reg.overused(beta)) + break; + } + } + if (!changed) { + for (auto bucket : buckets) { + if (reg.cells > reg.bels) { + IdString bucket_name = ctx->getBelBucketName(bucket); + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, + reg.x1, reg.y1, reg.cells.at(type_index.at(bucket)), bucket_name.c_str(ctx)); + } + } + break; + } + } + } + } + + // Implementation of the recursive cut-based spreading as described in the HeAP paper + // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir + + std::vector cut_cells; + + boost::optional> cut_region(SpreaderRegion &r, bool dir) + { + cut_cells.clear(); + auto &cal = cells_at_location; + int total_cells = 0, total_bels = 0; + for (int x = r.x0; x <= r.x1; x++) { + for (int y = r.y0; y <= r.y1; y++) { + std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + for (size_t t = 0; t < buckets.size(); t++) + total_bels += bels_at(x, y, t); + } + } + for (auto &cell : cut_cells) { + total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; + } + std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { + return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) + : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); + }); + + if (cut_cells.size() < 2) + return {}; + // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut + int pivot_cells = 0; + int pivot = 0; + for (auto &cell : cut_cells) { + pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; + if (pivot_cells >= total_cells / 2) + break; + pivot++; + } + if (pivot >= int(cut_cells.size())) { + pivot = int(cut_cells.size()) - 1; + } + // log_info("orig pivot %d/%d lc %d rc %d\n", pivot, int(cut_cells.size()), pivot_cells, total_cells - + // pivot_cells); + + // Find the clearance required either side of the pivot + int clearance_l = 0, clearance_r = 0; + for (size_t i = 0; i < cut_cells.size(); i++) { + int size; + if (cell_extents.count(cut_cells.at(i)->name)) { + auto &ce = cell_extents.at(cut_cells.at(i)->name); + size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1); + } else { + size = 1; + } + if (int(i) < pivot) + clearance_l = std::max(clearance_l, size); + else + clearance_r = std::max(clearance_r, size); + } + // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains + // still fit + + // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any + // bels of the appropriate type + int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1; + while (trimmed_l < (dir ? r.y1 : r.x1)) { + bool have_bels = false; + for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) { + for (size_t t = 0; t < buckets.size(); t++) { + if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i, t) > 0) { + have_bels = true; + break; + } + } + } + + if (have_bels) + break; + + trimmed_l++; + } + while (trimmed_r > (dir ? r.y0 : r.x0)) { + bool have_bels = false; + for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) { + for (size_t t = 0; t < buckets.size(); t++) { + if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i, t) > 0) { + have_bels = true; + break; + } + } + } + + if (have_bels) + break; + + trimmed_r--; + } + // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); + if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) + return {}; + // Now find the initial target cut that minimises utilisation imbalance, whilst + // meeting the clearance requirements for any large macros + std::vector left_cells_v(buckets.size(), 0), right_cells_v(buckets.size(), 0); + std::vector left_bels_v(buckets.size(), 0), right_bels_v(r.bels); + for (int i = 0; i <= pivot; i++) + left_cells_v.at(cell_index(*cut_cells.at(i))) += + p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; + for (int i = pivot + 1; i < int(cut_cells.size()); i++) + right_cells_v.at(cell_index(*cut_cells.at(i))) += + p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1; + + int best_tgt_cut = -1; + double best_deltaU = std::numeric_limits::max(); + // std::pair target_cut_bels; + std::vector slither_bels(buckets.size(), 0); + for (int i = trimmed_l; i <= trimmed_r; i++) { + for (size_t t = 0; t < buckets.size(); t++) + slither_bels.at(t) = 0; + for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { + for (size_t t = 0; t < buckets.size(); t++) + slither_bels.at(t) += dir ? bels_at(j, i, t) : bels_at(i, j, t); + } + for (size_t t = 0; t < buckets.size(); t++) { + left_bels_v.at(t) += slither_bels.at(t); + right_bels_v.at(t) -= slither_bels.at(t); + } + + if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { + // Solution is potentially valid + double aU = 0; + for (size_t t = 0; t < buckets.size(); t++) + aU += (left_cells_v.at(t) + right_cells_v.at(t)) * + std::abs(double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - + double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1))); + if (aU < best_deltaU) { + best_deltaU = aU; + best_tgt_cut = i; + } + } + } + if (best_tgt_cut == -1) + return {}; + // left_bels = target_cut_bels.first; + // right_bels = target_cut_bels.second; + for (size_t t = 0; t < buckets.size(); t++) { + left_bels_v.at(t) = 0; + right_bels_v.at(t) = 0; + } + for (int x = r.x0; x <= (dir ? r.x1 : best_tgt_cut); x++) + for (int y = r.y0; y <= (dir ? best_tgt_cut : r.y1); y++) { + for (size_t t = 0; t < buckets.size(); t++) { + left_bels_v.at(t) += bels_at(x, y, t); + } + } + for (int x = dir ? r.x0 : (best_tgt_cut + 1); x <= r.x1; x++) + for (int y = dir ? (best_tgt_cut + 1) : r.y0; y <= r.y1; y++) { + for (size_t t = 0; t < buckets.size(); t++) { + right_bels_v.at(t) += bels_at(x, y, t); + } + } + if (std::accumulate(left_bels_v.begin(), left_bels_v.end(), 0) == 0 || + std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0) == 0) + return {}; + + // Perturb the source cut to eliminate overutilisation + auto is_part_overutil = [&](bool r) { + double delta = 0; + for (size_t t = 0; t < left_cells_v.size(); t++) { + delta += double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) - + double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1)); + } + return r ? delta < 0 : delta > 0; + }; + while (pivot > 0 && is_part_overutil(false)) { + auto &move_cell = cut_cells.at(pivot); + int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; + left_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size; + right_cells_v.at(cell_index(*cut_cells.at(pivot))) += size; + pivot--; + } + while (pivot < int(cut_cells.size()) - 1 && is_part_overutil(true)) { + auto &move_cell = cut_cells.at(pivot + 1); + int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; + left_cells_v.at(cell_index(*cut_cells.at(pivot))) += size; + right_cells_v.at(cell_index(*cut_cells.at(pivot))) -= size; + pivot++; + } + + // Split regions into bins, and then spread cells by linear interpolation within those bins + auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { + int N = cells_end - cells_start; + if (N <= 2) { + for (int i = cells_start; i < cells_end; i++) { + auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy + : p->cell_locs.at(cut_cells.at(i)->name).rawx; + pos = area_l + i * ((area_r - area_l) / N); + } + return; + } + // Split region into up to 10 (K) bins + int K = std::min(N, 10); + std::vector> bin_bounds; // [(cell start, area start)] + bin_bounds.emplace_back(cells_start, area_l); + for (int i = 1; i < K; i++) + bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K); + bin_bounds.emplace_back(cells_end, area_r + 0.99); + for (int i = 0; i < K; i++) { + auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); + double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy + : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; + double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy + : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; + double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left); + for (int j = bl.first; j < br.first; j++) { + Region *cr = cut_cells.at(j)->region; + if (cr != nullptr) { + // Limit spreading bounds to constraint region; if applicable + double brsc = p->limit_to_reg(cr, br.second, dir); + double blsc = p->limit_to_reg(cr, bl.second, dir); + double mr = (brsc - blsc) / std::max(0.00001, orig_right - orig_left); + auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy + : p->cell_locs.at(cut_cells.at(j)->name).rawx; + NPNR_ASSERT(pos >= orig_left && pos <= orig_right); + pos = blsc + mr * (pos - orig_left); + } else { + auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy + : p->cell_locs.at(cut_cells.at(j)->name).rawx; + NPNR_ASSERT(pos >= orig_left && pos <= orig_right); + pos = bl.second + m * (pos - orig_left); + } + } + } + }; + spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut); + spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r); + // Update various data structures + for (int x = r.x0; x <= r.x1; x++) + for (int y = r.y0; y <= r.y1; y++) { + cells_at_location.at(x).at(y).clear(); + } + for (auto cell : cut_cells) { + auto &cl = p->cell_locs.at(cell->name); + cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx))); + cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy))); + cells_at_location.at(cl.x).at(cl.y).push_back(cell); + } + SpreaderRegion rl, rr; + rl.id = int(regions.size()); + rl.x0 = r.x0; + rl.y0 = r.y0; + rl.x1 = dir ? r.x1 : best_tgt_cut; + rl.y1 = dir ? best_tgt_cut : r.y1; + rl.cells = left_cells_v; + rl.bels = left_bels_v; + rr.id = int(regions.size()) + 1; + rr.x0 = dir ? r.x0 : (best_tgt_cut + 1); + rr.y0 = dir ? (best_tgt_cut + 1) : r.y0; + rr.x1 = r.x1; + rr.y1 = r.y1; + rr.cells = right_cells_v; + rr.bels = right_bels_v; + regions.push_back(rl); + regions.push_back(rr); + for (int x = rl.x0; x <= rl.x1; x++) + for (int y = rl.y0; y <= rl.y1; y++) + groups.at(x).at(y) = rl.id; + for (int x = rr.x0; x <= rr.x1; x++) + for (int y = rr.y0; y <= rr.y1; y++) + groups.at(x).at(y) = rr.id; + return std::make_pair(rl.id, rr.id); + }; + }; + typedef decltype(CellInfo::udata) cell_udata_t; + cell_udata_t dont_solve = std::numeric_limits::max(); +}; +int HeAPPlacer::CutSpreader::seq = 0; + +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); } + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) +{ + alpha = ctx->setting("placerHeap/alpha"); + beta = ctx->setting("placerHeap/beta"); + criticalityExponent = ctx->setting("placerHeap/criticalityExponent"); + timingWeight = ctx->setting("placerHeap/timingWeight"); + parallelRefine = ctx->setting("placerHeap/parallelRefine", false); + + timing_driven = ctx->setting("timing_driven"); + solverTolerance = 1e-5; + placeAllAtOnce = false; + + hpwl_scale_x = 1; + hpwl_scale_y = 1; + spread_scale_x = 1; + spread_scale_y = 1; +} + +NEXTPNR_NAMESPACE_END + +#else + +#include "log.h" +#include "nextpnr.h" +#include "placer_heap.h" + +NEXTPNR_NAMESPACE_BEGIN +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) +{ + log_error("nextpnr was built without the HeAP placer\n"); + return false; +} + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) {} + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/place/placer_heap.h b/common/place/placer_heap.h new file mode 100644 index 00000000..9c62869e --- /dev/null +++ b/common/place/placer_heap.h @@ -0,0 +1,58 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2019 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * [[cite]] HeAP + * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + * [[cite]] SimPL + * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + */ + +#ifndef PLACER_HEAP_H +#define PLACER_HEAP_H +#include "log.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct PlacerHeapCfg +{ + PlacerHeapCfg(Context *ctx); + + float alpha, beta; + float criticalityExponent; + float timingWeight; + bool timing_driven; + float solverTolerance; + bool placeAllAtOnce; + bool parallelRefine; + + int hpwl_scale_x, hpwl_scale_y; + int spread_scale_x, spread_scale_y; + + // These cell types will be randomly locked to prevent singular matrices + pool ioBufTypes; + // These cell types are part of the same unit (e.g. slices split into + // components) so will always be spread together + std::vector> cellGroups; +}; + +extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); +NEXTPNR_NAMESPACE_END +#endif diff --git a/common/place/timing_opt.cc b/common/place/timing_opt.cc new file mode 100644 index 00000000..f9246292 --- /dev/null +++ b/common/place/timing_opt.cc @@ -0,0 +1,561 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * Timing-optimised detailed placement algorithm using BFS of the neighbour graph created from cells + * on a critical path + * + * Based on "An Effective Timing-Driven Detailed Placement Algorithm for FPGAs" + * https://www.cerc.utexas.edu/utda/publications/C205.pdf + * + * Modifications made to deal with the smaller Bels that nextpnr uses instead of swapping whole tiles, + * and deal with the fact that not every cell on the crit path may be swappable. + */ + +#include "timing_opt.h" +#include +#include +#include "nextpnr.h" +#include "timing.h" +#include "util.h" + +NEXTPNR_NAMESPACE_BEGIN + +class TimingOptimiser +{ + public: + TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg), tmg(ctx){}; + bool optimise() + { + log_info("Running timing-driven placement optimisation...\n"); + ctx->lock(); + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); + tmg.setup(); + for (int i = 0; i < 30; i++) { + log_info(" Iteration %d...\n", i); + tmg.run(); + setup_delay_limits(); + auto crit_paths = find_crit_paths(0.98, 50000); + for (auto &path : crit_paths) + optimise_path(path); + if (ctx->verbose) + timing_analysis(ctx, false, true, false, false); + } + ctx->unlock(); + return true; + } + + private: + void setup_delay_limits() + { + max_net_delay.clear(); + for (auto &net : ctx->nets) { + NetInfo *ni = net.second.get(); + if (ni->driver.cell == nullptr) + continue; + for (auto usr : ni->users) { + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits::max(); + } + for (auto usr : ni->users) { + delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); + delay_t slack = tmg.get_setup_slack(CellPortKey(usr)); + delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr)); + if (slack == std::numeric_limits::max()) + continue; + max_net_delay[std::make_pair(usr.cell->name, usr.port)] = net_delay + ((slack - domain_slack) / 10); + } + } + } + + bool check_cell_delay_limits(CellInfo *cell) + { + for (const auto &port : cell->ports) { + int nc; + if (ctx->getPortTimingClass(cell, port.first, nc) == TMG_IGNORE) + continue; + NetInfo *net = port.second.net; + if (net == nullptr) + continue; + if (port.second.type == PORT_IN) { + if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) + continue; + for (auto user : net->users) { + if (user.cell == cell && user.port == port.first) { + if (ctx->predictArcDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(cell->name, port.first))) + return false; + } + } + + } else if (port.second.type == PORT_OUT) { + for (auto user : net->users) { + // This could get expensive for high-fanout nets?? + BelId dstBel = user.cell->bel; + if (dstBel == BelId()) + continue; + if (ctx->predictArcDelay(net, user) > + 1.1 * max_net_delay.at(std::make_pair(user.cell->name, user.port))) { + + return false; + } + } + } + } + return true; + } + + BelId cell_swap_bel(CellInfo *cell, BelId newBel) + { + BelId oldBel = cell->bel; + if (oldBel == newBel) + return oldBel; + CellInfo *other_cell = ctx->getBoundBelCell(newBel); + NPNR_ASSERT(other_cell == nullptr || other_cell->belStrength <= STRENGTH_WEAK); + ctx->unbindBel(oldBel); + if (other_cell != nullptr) { + ctx->unbindBel(newBel); + ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + } + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + return oldBel; + } + + // Check that a series of moves are both legal and remain within maximum delay bounds + // Moves are specified as a vector of pairs + bool acceptable_move(std::vector> &move, bool check_delays = true) + { + for (auto &entry : move) { + if (!ctx->isBelLocationValid(entry.first->bel)) + return false; + if (!ctx->isBelLocationValid(entry.second)) + return false; + if (!check_delays) + continue; + if (!check_cell_delay_limits(entry.first)) + return false; + // We might have swapped another cell onto the original bel. Check this for max delay violations + // too + CellInfo *swapped = ctx->getBoundBelCell(entry.second); + if (swapped != nullptr && !check_cell_delay_limits(swapped)) + return false; + } + return true; + } + + int find_neighbours(CellInfo *cell, IdString prev_cell, int d, bool allow_swap) + { + BelId curr = cell->bel; + Loc curr_loc = ctx->getBelLocation(curr); + int found_count = 0; + cell_neighbour_bels[cell->name] = pool{}; + for (int dy = -d; dy <= d; dy++) { + for (int dx = -d; dx <= d; dx++) { + // Go through all the Bels at this location + // First, find all bels of the correct type that are either unbound or bound normally + // Strongly bound bels are ignored + // FIXME: This means that we cannot touch carry chains or similar relatively constrained macros + std::vector free_bels_at_loc; + std::vector bound_bels_at_loc; + for (auto bel : ctx->getBelsByTile(curr_loc.x + dx, curr_loc.y + dy)) { + if (!ctx->isValidBelForCellType(cell->type, bel)) + continue; + CellInfo *bound = ctx->getBoundBelCell(bel); + if (bound == nullptr) { + free_bels_at_loc.push_back(bel); + } else if (bound->belStrength <= STRENGTH_WEAK && bound->cluster == ClusterId()) { + bound_bels_at_loc.push_back(bel); + } + } + BelId candidate; + + while (!free_bels_at_loc.empty() || !bound_bels_at_loc.empty()) { + BelId try_bel; + if (!free_bels_at_loc.empty()) { + int try_idx = ctx->rng(int(free_bels_at_loc.size())); + try_bel = free_bels_at_loc.at(try_idx); + free_bels_at_loc.erase(free_bels_at_loc.begin() + try_idx); + } else { + int try_idx = ctx->rng(int(bound_bels_at_loc.size())); + try_bel = bound_bels_at_loc.at(try_idx); + bound_bels_at_loc.erase(bound_bels_at_loc.begin() + try_idx); + } + if (bel_candidate_cells.count(try_bel) && !allow_swap) { + // Overlap is only allowed if it is with the previous cell (this is handled by removing those + // edges in the graph), or if allow_swap is true to deal with cases where overlap means few + // neighbours are identified + if (bel_candidate_cells.at(try_bel).size() > 1 || + (bel_candidate_cells.at(try_bel).size() == 1 && + *(bel_candidate_cells.at(try_bel).begin()) != prev_cell)) + continue; + } + // TODO: what else to check here? + candidate = try_bel; + break; + } + + if (candidate != BelId()) { + cell_neighbour_bels[cell->name].insert(candidate); + bel_candidate_cells[candidate].insert(cell->name); + // Work out if we need to delete any overlap + std::vector overlap; + for (auto other : bel_candidate_cells[candidate]) + if (other != cell->name && other != prev_cell) + overlap.push_back(other); + if (overlap.size() > 0) + NPNR_ASSERT(allow_swap); + for (auto ov : overlap) { + bel_candidate_cells[candidate].erase(ov); + cell_neighbour_bels[ov].erase(candidate); + } + } + } + } + return found_count; + } + + std::vector> find_crit_paths(float crit_thresh, size_t max_count) + { + std::vector> crit_paths; + std::vector>> crit_nets; + std::vector netnames; + std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), + [](const std::pair> &kv) { return kv.first; }); + ctx->sorted_shuffle(netnames); + for (auto net : netnames) { + if (crit_nets.size() >= max_count) + break; + float highest_crit = 0; + store_index crit_user_idx{}; + NetInfo *ni = ctx->nets.at(net).get(); + for (auto usr : ni->users.enumerate()) { + float crit = tmg.get_criticality(CellPortKey(usr.value)); + if (crit > highest_crit) { + highest_crit = crit; + crit_user_idx = usr.index; + } + } + if (highest_crit > crit_thresh) + crit_nets.emplace_back(ni, crit_user_idx); + } + + pool used_ports; + + for (auto crit_net : crit_nets) { + + if (used_ports.count(&(crit_net.first->users.at(crit_net.second)))) + continue; + + std::deque crit_path; + + // FIXME: This will fail badly on combinational loops + + // Iterate backwards following greatest criticality + NetInfo *back_cursor = crit_net.first; + while (back_cursor != nullptr) { + float max_crit = 0; + std::pair> crit_sink{nullptr, {}}; + CellInfo *cell = back_cursor->driver.cell; + if (cell == nullptr) + break; + for (auto port : cell->ports) { + if (port.second.type != PORT_IN) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + int ccount; + DelayQuad combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_INPUT) + continue; + bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); + if (!is_path) + continue; + float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first)); + if (used_ports.count(&(pn->users.at(port.second.user_idx)))) + continue; + if (usr_crit >= max_crit) { + max_crit = usr_crit; + crit_sink = std::make_pair(pn, port.second.user_idx); + } + } + + if (crit_sink.first != nullptr) { + crit_path.push_front(&(crit_sink.first->users.at(crit_sink.second))); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); + } + back_cursor = crit_sink.first; + } + // Iterate forwards following greatest criticiality + PortRef *fwd_cursor = &(crit_net.first->users.at(crit_net.second)); + while (fwd_cursor != nullptr) { + crit_path.push_back(fwd_cursor); + float max_crit = 0; + std::pair> crit_sink{nullptr, {}}; + CellInfo *cell = fwd_cursor->cell; + for (auto port : cell->ports) { + if (port.second.type != PORT_OUT) + continue; + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + + int ccount; + DelayQuad combDelay; + TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); + if (tpclass != TMG_COMB_OUTPUT && tpclass != TMG_REGISTER_OUTPUT) + continue; + bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); + if (!is_path) + continue; + for (auto usr : pn->users.enumerate()) { + if (used_ports.count(&(pn->users.at(usr.index)))) + continue; + float crit = tmg.get_criticality(CellPortKey(usr.value)); + if (crit >= max_crit) { + max_crit = crit; + crit_sink = std::make_pair(pn, usr.index); + } + } + } + if (crit_sink.first != nullptr) { + fwd_cursor = &(crit_sink.first->users.at(crit_sink.second)); + used_ports.insert(&(crit_sink.first->users.at(crit_sink.second))); + } else { + fwd_cursor = nullptr; + } + } + + std::vector crit_path_vec; + std::copy(crit_path.begin(), crit_path.end(), std::back_inserter(crit_path_vec)); + crit_paths.push_back(crit_path_vec); + } + + return crit_paths; + } + + void optimise_path(std::vector &path) + { + path_cells.clear(); + cell_neighbour_bels.clear(); + bel_candidate_cells.clear(); + if (ctx->debug) + log_info("Optimising the following path: \n"); + + auto front_port = path.front(); + NetInfo *front_net = front_port->cell->ports.at(front_port->port).net; + if (front_net != nullptr && front_net->driver.cell != nullptr) { + auto front_cell = front_net->driver.cell; + if (front_cell->belStrength <= STRENGTH_WEAK && cfg.cellTypes.count(front_cell->type) && + front_cell->cluster == ClusterId()) { + path_cells.push_back(front_cell->name); + } + } + + for (auto port : path) { + if (ctx->debug) { + float crit = tmg.get_criticality(CellPortKey(*port)); + log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), + ctx->nameOfBel(port->cell->bel), crit); + } + if (std::find(path_cells.begin(), path_cells.end(), port->cell->name) != path_cells.end()) + continue; + if (port->cell->belStrength > STRENGTH_WEAK || !cfg.cellTypes.count(port->cell->type) || + port->cell->cluster != ClusterId()) + continue; + if (ctx->debug) + log_info(" can move\n"); + path_cells.push_back(port->cell->name); + } + + if (path_cells.size() < 2) { + if (ctx->debug) { + log_info("Too few moveable cells; skipping path\n"); + log_break(); + } + + return; + } + + // Calculate original delay before touching anything + delay_t original_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + auto &port = path.at(i)->cell->ports.at(path.at(i)->port); + NetInfo *pn = port.net; + if (port.user_idx) + original_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx)); + } + + IdString last_cell; + const int d = 2; // FIXME: how to best determine d + for (auto cell : path_cells) { + // FIXME: when should we allow swapping due to a lack of candidates + find_neighbours(ctx->cells[cell].get(), last_cell, d, false); + last_cell = cell; + } + + if (ctx->debug) { + for (auto cell : path_cells) { + log_info("Candidate neighbours for %s (%s):\n", cell.c_str(ctx), ctx->nameOfBel(ctx->cells[cell]->bel)); + for (auto neigh : cell_neighbour_bels.at(cell)) { + log_info(" %s\n", ctx->nameOfBel(neigh)); + } + } + } + + // Actual BFS path optimisation algorithm + dict> cumul_costs; + dict, std::pair> backtrace; + std::queue> visit; + pool> to_visit; + + for (auto startbel : cell_neighbour_bels[path_cells.front()]) { + // Swap for legality check + CellInfo *cell = ctx->cells.at(path_cells.front()).get(); + BelId origBel = cell_swap_bel(cell, startbel); + std::vector> move{std::make_pair(cell, origBel)}; + if (acceptable_move(move)) { + auto entry = std::make_pair(0, startbel); + visit.push(entry); + cumul_costs[path_cells.front()][startbel] = 0; + } + // Swap back + cell_swap_bel(cell, origBel); + } + + while (!visit.empty()) { + auto entry = visit.front(); + visit.pop(); + auto cellname = path_cells.at(entry.first); + if (entry.first == int(path_cells.size()) - 1) + continue; + std::vector> move; + // Apply the entire backtrace for accurate legality and delay checks + // This is probably pretty expensive (but also probably pales in comparison to the number of swaps + // SA will make...) + std::vector> route_to_entry; + auto cursor = std::make_pair(cellname, entry.second); + route_to_entry.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_entry.push_back(cursor); + } + for (auto rt_entry : boost::adaptors::reverse(route_to_entry)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + BelId origBel = cell_swap_bel(cell, rt_entry.second); + move.push_back(std::make_pair(cell, origBel)); + } + + // Have a look at where we can travel from here + for (auto neighbour : cell_neighbour_bels.at(path_cells.at(entry.first + 1))) { + // Edges between overlapping bels are deleted + if (neighbour == entry.second) + continue; + // Experimentally swap the next path cell onto the neighbour bel we are trying + IdString ncname = path_cells.at(entry.first + 1); + CellInfo *next_cell = ctx->cells.at(ncname).get(); + BelId origBel = cell_swap_bel(next_cell, neighbour); + move.push_back(std::make_pair(next_cell, origBel)); + + delay_t total_delay = 0; + + for (size_t i = 0; i < path.size(); i++) { + auto &port = path.at(i)->cell->ports.at(path.at(i)->port); + NetInfo *pn = port.net; + if (port.user_idx) + total_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx)); + if (path.at(i)->cell == next_cell) + break; + } + + // First, check if the move is actually worthwhile from a delay point of view before the expensive + // legality check + if (!cumul_costs.count(ncname) || !cumul_costs.at(ncname).count(neighbour) || + cumul_costs.at(ncname).at(neighbour) > total_delay) { + // Now check that the swaps we have made to get here are legal and meet max delay requirements + if (acceptable_move(move)) { + cumul_costs[ncname][neighbour] = total_delay; + backtrace[std::make_pair(ncname, neighbour)] = std::make_pair(cellname, entry.second); + if (!to_visit.count(std::make_pair(entry.first + 1, neighbour))) + visit.push(std::make_pair(entry.first + 1, neighbour)); + } + } + // Revert the experimental swap + cell_swap_bel(move.back().first, move.back().second); + move.pop_back(); + } + + // Revert move by swapping cells back to their original order + // Execute swaps in reverse order to how we made them originally + for (auto move_entry : boost::adaptors::reverse(move)) { + cell_swap_bel(move_entry.first, move_entry.second); + } + } + + // Did we find a solution?? + if (cumul_costs.count(path_cells.back())) { + // Find the end position with the lowest total delay + auto &end_options = cumul_costs.at(path_cells.back()); + auto lowest = std::min_element(end_options.begin(), end_options.end(), + [](const std::pair &a, const std::pair &b) { + return a.second < b.second; + }); + NPNR_ASSERT(lowest != end_options.end()); + + std::vector> route_to_solution; + auto cursor = std::make_pair(path_cells.back(), lowest->first); + route_to_solution.push_back(cursor); + while (backtrace.count(cursor)) { + cursor = backtrace.at(cursor); + route_to_solution.push_back(cursor); + } + if (ctx->debug) + log_info("Found a solution with cost %.02f ns (existing path %.02f ns)\n", + ctx->getDelayNS(lowest->second), ctx->getDelayNS(original_delay)); + for (auto rt_entry : boost::adaptors::reverse(route_to_solution)) { + CellInfo *cell = ctx->cells.at(rt_entry.first).get(); + cell_swap_bel(cell, rt_entry.second); + if (ctx->debug) + log_info(" %s at %s\n", rt_entry.first.c_str(ctx), ctx->nameOfBel(rt_entry.second)); + } + + } else { + if (ctx->debug) + log_info("Solution was not found\n"); + } + if (ctx->debug) + log_break(); + } + + // Current candidate Bels for cells (linked in both direction> + std::vector path_cells; + dict> cell_neighbour_bels; + dict> bel_candidate_cells; + // Map cell ports to net delay limit + dict, delay_t> max_net_delay; + Context *ctx; + TimingOptCfg cfg; + TimingAnalyser tmg; +}; + +bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } + +NEXTPNR_NAMESPACE_END diff --git a/common/place/timing_opt.h b/common/place/timing_opt.h new file mode 100644 index 00000000..8f8bc709 --- /dev/null +++ b/common/place/timing_opt.h @@ -0,0 +1,37 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2018 gatecat + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "log.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct TimingOptCfg +{ + TimingOptCfg(Context *ctx) {} + + // The timing optimiser will *only* optimise cells of these types + // Normally these would only be logic cells (or tiles if applicable), the algorithm makes little sense + // for other cell types + pool cellTypes; +}; + +extern bool timing_opt(Context *ctx, TimingOptCfg cfg); + +NEXTPNR_NAMESPACE_END -- cgit v1.2.3