aboutsummaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
authorgatecat <gatecat@ds0.me>2022-04-13 19:18:13 +0100
committergatecat <gatecat@ds0.me>2022-04-17 20:10:49 +0100
commit61b3e2e1ffbf0c0438c734a04d9d86d75668677e (patch)
tree4a03c9c9900f5f266bc0f199fea842b034a349b0 /common
parent895aa01e391445ad8bdcfc40d091cda2f783cbb1 (diff)
downloadnextpnr-61b3e2e1ffbf0c0438c734a04d9d86d75668677e.tar.gz
nextpnr-61b3e2e1ffbf0c0438c734a04d9d86d75668677e.tar.bz2
nextpnr-61b3e2e1ffbf0c0438c734a04d9d86d75668677e.zip
Move general parallel detail place code out of parallel_refine
Signed-off-by: gatecat <gatecat@ds0.me>
Diffstat (limited to 'common')
-rw-r--r--common/place/detail_place_cfg.h36
-rw-r--r--common/place/detail_place_core.cc457
-rw-r--r--common/place/detail_place_core.h224
-rw-r--r--common/place/parallel_refine.cc552
-rw-r--r--common/place/parallel_refine.h6
5 files changed, 730 insertions, 545 deletions
diff --git a/common/place/detail_place_cfg.h b/common/place/detail_place_cfg.h
new file mode 100644
index 00000000..dadda0e8
--- /dev/null
+++ b/common/place/detail_place_cfg.h
@@ -0,0 +1,36 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef DETAIL_PLACE_CFG_H
+#define DETAIL_PLACE_CFG_H
+
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct DetailPlaceCfg
+{
+ DetailPlaceCfg(Context *ctx);
+ bool timing_driven;
+ int hpwl_scale_x, hpwl_scale_y;
+ float crit_exp = 8;
+};
+
+NEXTPNR_NAMESPACE_END
+#endif
diff --git a/common/place/detail_place_core.cc b/common/place/detail_place_core.cc
new file mode 100644
index 00000000..bb383639
--- /dev/null
+++ b/common/place/detail_place_core.cc
@@ -0,0 +1,457 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "detail_place_core.h"
+#include "nextpnr.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+DetailPlaceCfg::DetailPlaceCfg(Context *ctx)
+{
+ timing_driven = ctx->setting<bool>("timing_driven");
+
+ hpwl_scale_x = 1;
+ hpwl_scale_y = 1;
+}
+
+PlacePartition::PlacePartition(Context *ctx)
+{
+ x0 = ctx->getGridDimX();
+ y0 = ctx->getGridDimY();
+ x1 = 0;
+ y1 = 0;
+ for (auto &cell : ctx->cells) {
+ Loc l = ctx->getBelLocation(cell.second->bel);
+ x0 = std::min(x0, l.x);
+ x1 = std::max(x1, l.x);
+ y0 = std::min(y0, l.y);
+ y1 = std::max(y1, l.y);
+ cells.push_back(cell.second.get());
+ }
+}
+
+void PlacePartition::split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r)
+{
+ std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) {
+ Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel);
+ return yaxis ? (l0.y < l1.y) : (l0.x < l1.x);
+ });
+ size_t pivot_point = size_t(cells.size() * pivot);
+ l.cells.clear();
+ r.cells.clear();
+ l.cells.reserve(pivot_point);
+ r.cells.reserve(cells.size() - pivot_point);
+ int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1)
+ : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y
+ : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x);
+ for (size_t i = 0; i < cells.size(); i++) {
+ Loc loc = ctx->getBelLocation(cells.at(i)->bel);
+ ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i));
+ }
+ if (yaxis) {
+ l.x0 = r.x0 = x0;
+ l.x1 = r.x1 = x1;
+ l.y0 = y0;
+ l.y1 = pivot_coord;
+ r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1);
+ r.y1 = y1;
+ } else {
+ l.y0 = r.y0 = y0;
+ l.y1 = r.y1 = y1;
+ l.x0 = x0;
+ l.x1 = pivot_coord;
+ r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1);
+ r.x1 = x1;
+ }
+}
+
+void DetailPlacerState::update_global_costs()
+{
+ last_bounds.resize(flat_nets.size());
+ last_tmg_costs.resize(flat_nets.size());
+ total_wirelen = 0;
+ total_timing_cost = 0;
+ for (size_t i = 0; i < flat_nets.size(); i++) {
+ NetInfo *ni = flat_nets.at(i);
+ if (skip_net(ni))
+ continue;
+ last_bounds.at(i) = NetBB::compute(ctx, ni);
+ total_wirelen += last_bounds.at(i).hpwl(base_cfg);
+ if (!timing_skip_net(ni)) {
+ auto &tc = last_tmg_costs.at(i);
+ tc.resize(ni->users.capacity());
+ for (auto usr : ni->users.enumerate()) {
+ tc.at(usr.index.idx()) = get_timing_cost(ni, usr.index);
+ total_timing_cost += tc.at(usr.index.idx());
+ }
+ }
+ }
+}
+
+NetBB NetBB::compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel)
+{
+ NetBB result{};
+ if (!net->driver.cell)
+ return result;
+ auto bel_loc = [&](const CellInfo *cell) {
+ BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel;
+ return ctx->getBelLocation(bel);
+ };
+ result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1;
+ Loc drv_loc = bel_loc(net->driver.cell);
+ result.x0 = result.x1 = drv_loc.x;
+ result.y0 = result.y1 = drv_loc.y;
+ for (auto &usr : net->users) {
+ Loc l = bel_loc(usr.cell);
+ if (l.x == result.x0)
+ ++result.nx0; // on the edge
+ else if (l.x < result.x0) {
+ result.x0 = l.x; // extends the edge
+ result.nx0 = 1;
+ }
+ if (l.x == result.x1)
+ ++result.nx1; // on the edge
+ else if (l.x > result.x1) {
+ result.x1 = l.x; // extends the edge
+ result.nx1 = 1;
+ }
+ if (l.y == result.y0)
+ ++result.ny0; // on the edge
+ else if (l.y < result.y0) {
+ result.y0 = l.y; // extends the edge
+ result.ny0 = 1;
+ }
+ if (l.y == result.y1)
+ ++result.ny1; // on the edge
+ else if (l.y > result.y1) {
+ result.y1 = l.y; // extends the edge
+ result.ny1 = 1;
+ }
+ }
+ return result;
+}
+
+void DetailPlacerThreadState::set_partition(const PlacePartition &part)
+{
+ p = part;
+ thread_nets.clear();
+ thread_net_idx.resize(g.flat_nets.size());
+ std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1);
+ // Determine the set of nets that are within the thread; and therefore we care about
+ for (auto thread_cell : part.cells) {
+ for (auto &port : thread_cell->ports) {
+ if (!port.second.net)
+ continue;
+ int global_idx = port.second.net->udata;
+ auto &thread_idx = thread_net_idx.at(global_idx);
+ // Already added to the set
+ if (thread_idx != -1)
+ continue;
+ thread_idx = thread_nets.size();
+ thread_nets.push_back(port.second.net);
+ }
+ }
+ tmg_ignored_nets.clear();
+ ignored_nets.clear();
+ for (auto tn : thread_nets) {
+ ignored_nets.push_back(g.skip_net(tn));
+ tmg_ignored_nets.push_back(g.timing_skip_net(tn));
+ }
+ // Set up the original cell-bel map for all nets inside the thread
+ local_cell2bel.clear();
+ for (NetInfo *net : thread_nets) {
+ if (net->driver.cell)
+ local_cell2bel[net->driver.cell->name] = net->driver.cell->bel;
+ for (auto &usr : net->users)
+ local_cell2bel[usr.cell->name] = usr.cell->bel;
+ }
+}
+
+void DetailPlacerThreadState::setup_initial_state()
+{
+ // Setup initial net bounding boxes and timing costs
+ net_bounds.clear();
+ arc_tmg_cost.clear();
+ for (auto tn : thread_nets) {
+ net_bounds.push_back(g.last_bounds.at(tn->udata));
+ arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata));
+ }
+ new_net_bounds = net_bounds;
+ for (int j = 0; j < 2; j++) {
+ auto &a = axes.at(j);
+ a.already_bounds_changed.resize(net_bounds.size());
+ }
+ already_timing_changed.clear();
+ already_timing_changed.resize(net_bounds.size());
+ for (size_t i = 0; i < thread_nets.size(); i++)
+ already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity());
+}
+
+bool DetailPlacerThreadState::bounds_check(BelId bel)
+{
+ Loc l = ctx->getBelLocation(bel);
+ if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1)
+ return false;
+ return true;
+}
+
+bool DetailPlacerThreadState::bind_move()
+{
+#if !defined(__wasm)
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ for (auto &entry : moved_cells) {
+ ctx->unbindBel(entry.second.first);
+ }
+ bool success = true;
+ for (auto &entry : moved_cells) {
+ // Make sure targets are available before we bind them
+ if (!ctx->checkBelAvail(entry.second.second)) {
+ success = false;
+ break;
+ }
+ ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = true;
+ return success;
+}
+
+bool DetailPlacerThreadState::check_validity()
+{
+#if !defined(__wasm)
+ std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ bool result = true;
+ for (auto e : moved_cells) {
+ if (!ctx->isBelLocationValid(e.second.first)) {
+ // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer
+ // enabling dedicated routes to be used
+ result = false;
+ break;
+ }
+ if (!ctx->isBelLocationValid(e.second.second)) {
+ result = false;
+ break;
+ }
+ }
+ return result;
+}
+
+void DetailPlacerThreadState::revert_move()
+{
+ if (arch_state_dirty) {
+ // If changes to the arch state were made, revert them by restoring original cell bindings
+#if !defined(__wasm)
+ std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
+#endif
+ for (auto &entry : moved_cells) {
+ BelId curr_bound = ctx->cells.at(entry.first)->bel;
+ if (curr_bound != BelId())
+ ctx->unbindBel(curr_bound);
+ }
+ for (auto &entry : moved_cells) {
+ ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
+ }
+ arch_state_dirty = false;
+ }
+ for (auto &entry : moved_cells)
+ local_cell2bel[entry.first] = entry.second.first;
+}
+
+void DetailPlacerThreadState::commit_move()
+{
+ arch_state_dirty = false;
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ // Commit updated net bounds
+ net_bounds.at(bc) = new_net_bounds.at(bc);
+ }
+ }
+ if (g.base_cfg.timing_driven) {
+ NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size());
+ for (size_t i = 0; i < timing_changed_arcs.size(); i++) {
+ auto arc = timing_changed_arcs.at(i);
+ arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i);
+ }
+ }
+}
+
+void DetailPlacerThreadState::compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel)
+{
+ Loc new_loc = ctx->getBelLocation(new_bel);
+ Loc old_loc = ctx->getBelLocation(old_bel);
+ for (const auto &port : cell->ports) {
+ NetInfo *pn = port.second.net;
+ if (!pn)
+ continue;
+ int idx = thread_net_idx.at(pn->udata);
+ if (ignored_nets.at(idx))
+ continue;
+ NetBB &new_bounds = new_net_bounds.at(idx);
+ // For the x-axis (i=0) and y-axis (i=1)
+ for (int i = 0; i < 2; i++) {
+ auto &axis = axes.at(i);
+ // New and old on this axis
+ int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x;
+ // References to updated bounding box entries
+ auto &b0 = i ? new_bounds.y0 : new_bounds.x0;
+ auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0;
+ auto &b1 = i ? new_bounds.y1 : new_bounds.x1;
+ auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1;
+ auto &change = axis.already_bounds_changed.at(idx);
+ // Lower bound
+ if (new_pos < b0) {
+ // Further out than current lower bound
+ b0 = new_pos;
+ n0 = 1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b0 && old_pos > b0) {
+ // Moved from inside into current bound
+ ++n0;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b0 && new_pos > b0) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n0 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n0;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ // Upper bound
+ if (new_pos > b1) {
+ // Further out than current upper bound
+ b1 = new_pos;
+ n1 = new_pos;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (new_pos == b1 && old_pos < b1) {
+ // Moved onto current bound
+ ++n1;
+ if (change == NO_CHANGE) {
+ change = CELL_MOVED_OUTWARDS;
+ axis.bounds_changed_nets.push_back(idx);
+ }
+ } else if (old_pos == b1 && new_pos < b1) {
+ // Moved from current bound to inside
+ if (change == NO_CHANGE)
+ axis.bounds_changed_nets.push_back(idx);
+ if (n1 == 1) {
+ // Was the last cell on the bound; have to do a full recompute
+ change = FULL_RECOMPUTE;
+ } else {
+ --n1;
+ if (change == NO_CHANGE)
+ change = CELL_MOVED_INWARDS;
+ }
+ }
+ }
+ // Timing updates if timing driven
+ if (g.base_cfg.timing_driven && !tmg_ignored_nets.at(idx)) {
+ if (port.second.type == PORT_OUT) {
+ int cc;
+ TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
+ if (cls != TMG_IGNORE) {
+ for (auto usr : pn->users.enumerate())
+ if (!already_timing_changed.at(idx).at(usr.index.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index));
+ already_timing_changed.at(idx).at(usr.index.idx()) = true;
+ }
+ }
+ } else {
+ auto usr = port.second.user_idx;
+ if (!already_timing_changed.at(idx).at(usr.idx())) {
+ timing_changed_arcs.emplace_back(std::make_pair(idx, usr));
+ already_timing_changed.at(idx).at(usr.idx()) = true;
+ }
+ }
+ }
+ }
+}
+
+void DetailPlacerThreadState::compute_total_change()
+{
+ auto &xa = axes.at(0), &ya = axes.at(1);
+ for (auto &bc : xa.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
+ new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
+ for (auto &bc : xa.bounds_changed_nets)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg));
+ for (auto &bc : ya.bounds_changed_nets)
+ if (xa.already_bounds_changed.at(bc) == NO_CHANGE)
+ wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg));
+ if (g.base_cfg.timing_driven) {
+ NPNR_ASSERT(new_timing_costs.empty());
+ for (auto arc : timing_changed_arcs) {
+ double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel);
+ timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx()));
+ new_timing_costs.push_back(new_cost);
+ }
+ }
+}
+
+void DetailPlacerThreadState::reset_move_state()
+{
+ moved_cells.clear();
+ cell_rel.clear();
+ for (auto &axis : axes) {
+ for (auto bc : axis.bounds_changed_nets) {
+ new_net_bounds.at(bc) = net_bounds.at(bc);
+ axis.already_bounds_changed[bc] = NO_CHANGE;
+ }
+ axis.bounds_changed_nets.clear();
+ }
+ for (auto &arc : timing_changed_arcs) {
+ already_timing_changed.at(arc.first).at(arc.second.idx()) = false;
+ }
+ timing_changed_arcs.clear();
+ new_timing_costs.clear();
+ wirelen_delta = 0;
+ timing_delta = 0;
+}
+
+bool DetailPlacerThreadState::add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel)
+{
+ if (!bounds_check(old_bel) || !bounds_check(new_bel))
+ return false;
+ if (!ctx->isValidBelForCellType(cell->type, new_bel))
+ return false;
+ NPNR_ASSERT(!moved_cells.count(cell->name));
+ moved_cells[cell->name] = std::make_pair(old_bel, new_bel);
+ local_cell2bel[cell->name] = new_bel;
+ compute_changes_for_cell(cell, old_bel, new_bel);
+ return true;
+}
+
+NEXTPNR_NAMESPACE_END
diff --git a/common/place/detail_place_core.h b/common/place/detail_place_core.h
new file mode 100644
index 00000000..1c280f24
--- /dev/null
+++ b/common/place/detail_place_core.h
@@ -0,0 +1,224 @@
+/*
+ * nextpnr -- Next Generation Place and Route
+ *
+ * Copyright (C) 2021-22 gatecat <gatecat@ds0.me>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+This provides core data structures for a thread-safe detail placer that needs to swap cells and evaluate the cost
+changes of swaps.
+
+It works on a partition-based threading approach; although threading can be avoided by only instantiating one
+per-thread structure and calling its methods from the main thread.
+
+Each thread's data includes its own local net indexing for nets inside the partition (which can overlap thread
+boundaries); and its own local cell-to-bel mapping for any cells on those nets, so there are no races with moves
+made by other threads.
+
+A move is an atomic transaction of updated cell to bel mappings inside a thread. The first step is to reset the
+per-move structures; then to add all of the moved cells to the move with add_to_move.
+
+Evaluation of wirelength and timing changes of a move is done with compute_changes_for_cell and compute_total_change.
+
+bind_move will probationally bind the move using the arch API functions, acquiring a lock during this time to prevent
+races on non-thread-safe arch implementations, returning true if the bind succeeded or false if something went wrong
+and it should be aborted. check_validity must then be called to use the arch API validity check functions on the move.
+
+Finally if the move meets criteria and is accepted then commit_move marks it as committed, otherwise revert_move
+aborts the entire move transaction.
+*/
+
+#ifndef DETAIL_PLACE_CORE_H
+#define DETAIL_PLACE_CORE_H
+
+#include "nextpnr.h"
+
+#include "detail_place_cfg.h"
+#include "fast_bels.h"
+#include "timing.h"
+
+#include <queue>
+
+#if !defined(__wasm)
+#include <shared_mutex>
+#endif
+
+NEXTPNR_NAMESPACE_BEGIN
+
+struct PlacePartition
+{
+ int x0, y0, x1, y1;
+ std::vector<CellInfo *> cells;
+ PlacePartition() = default;
+ explicit PlacePartition(Context *ctx);
+ void split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r);
+};
+
+typedef int64_t wirelen_t;
+
+struct NetBB
+{
+ // Actual bounding box
+ int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
+ // Number of cells at each extremity
+ int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
+ inline wirelen_t hpwl(const DetailPlaceCfg &cfg) const
+ {
+ return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
+ }
+ static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr);
+};
+
+struct DetailPlacerState
+{
+ explicit DetailPlacerState(Context *ctx, DetailPlaceCfg &cfg)
+ : ctx(ctx), base_cfg(cfg), bels(ctx, false, 64), tmg(ctx){};
+ Context *ctx;
+ DetailPlaceCfg &base_cfg;
+ FastBels bels;
+ std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index
+ std::vector<NetBB> last_bounds;
+ std::vector<std::vector<double>> last_tmg_costs;
+ dict<IdString, NetBB> region_bounds;
+ TimingAnalyser tmg;
+
+ wirelen_t total_wirelen = 0;
+ double total_timing_cost = 0;
+
+#if !defined(__wasm)
+ std::shared_timed_mutex archapi_mutex;
+#endif
+
+ inline double get_timing_cost(const NetInfo *net, store_index<PortRef> user,
+ const dict<IdString, BelId> *cell2bel = nullptr)
+ {
+ if (!net->driver.cell)
+ return 0;
+ const auto &sink = net->users.at(user);
+ IdString driver_pin, sink_pin;
+ // Pick the first pin for a prediction; assume all will be similar enouhg
+ for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) {
+ driver_pin = pin;
+ break;
+ }
+ for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) {
+ sink_pin = pin;
+ break;
+ }
+ float crit = tmg.get_criticality(CellPortKey(sink));
+ BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel;
+ BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel;
+ double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin));
+ return delay * std::pow(crit, base_cfg.crit_exp);
+ }
+
+ inline bool skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ if (ctx->getBelGlobalBuf(net->driver.cell->bel))
+ return true;
+ return false;
+ }
+
+ inline bool timing_skip_net(const NetInfo *net) const
+ {
+ if (!net->driver.cell)
+ return true;
+ int cc;
+ auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc);
+ if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK)
+ return true;
+ return false;
+ }
+
+ void update_global_costs();
+};
+
+struct DetailPlacerThreadState
+{
+ Context *ctx; // Nextpnr context pointer
+ DetailPlacerState &g; // Placer engine state
+ int idx; // Index of the thread
+ DeterministicRNG rng; // Local RNG
+ // The cell partition that the thread works on
+ PlacePartition p;
+ // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can
+ // optimise
+ std::vector<int> thread_net_idx;
+ // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's
+ // perspective
+ std::vector<NetInfo *> thread_nets;
+ std::vector<NetBB> net_bounds;
+ std::vector<std::vector<double>> arc_tmg_cost;
+ std::vector<bool> ignored_nets, tmg_ignored_nets;
+ bool arch_state_dirty = false;
+ // Our local cell-bel map; that won't be affected by out-of-partition moves
+ dict<IdString, BelId> local_cell2bel;
+
+ // Data on an inflight move
+ dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new)
+ // For cluster moves only
+ std::vector<std::pair<CellInfo *, Loc>> cell_rel;
+ // For incremental wirelength and delay updates
+ wirelen_t wirelen_delta = 0;
+ double timing_delta = 0;
+ // Wirelen related are handled on a per-axis basis to reduce
+ enum BoundChange
+ {
+ NO_CHANGE,
+ CELL_MOVED_INWARDS,
+ CELL_MOVED_OUTWARDS,
+ FULL_RECOMPUTE
+ };
+ struct AxisChanges
+ {
+ std::vector<int> bounds_changed_nets;
+ std::vector<BoundChange> already_bounds_changed;
+ };
+ std::array<AxisChanges, 2> axes;
+ std::vector<NetBB> new_net_bounds;
+
+ std::vector<std::vector<bool>> already_timing_changed;
+ std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs;
+ std::vector<double> new_timing_costs;
+
+ DetailPlacerThreadState(Context *ctx, DetailPlacerState &g, int idx) : ctx(ctx), g(g), idx(idx){};
+ void set_partition(const PlacePartition &part);
+ void setup_initial_state();
+ bool bounds_check(BelId bel);
+
+ // Reset the inflight move state
+ void reset_move_state();
+ // Add a cell change to the move
+ bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel);
+ // For an inflight move; attempt to actually apply the changes to the arch API
+ bool bind_move();
+ // Checks if the arch API bel validity for a move is accepted
+ bool check_validity();
+ // Undo any changes relating to an inflight move
+ void revert_move();
+ // Mark the inflight move as complete and update cost structures
+ void commit_move();
+ // Update the inflight cost change structures for a given cell moe
+ void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel);
+ // Update the total cost change for an inflight move
+ void compute_total_change();
+};
+
+NEXTPNR_NAMESPACE_END
+
+#endif
diff --git a/common/place/parallel_refine.cc b/common/place/parallel_refine.cc
index a868ca58..a2de5b13 100644
--- a/common/place/parallel_refine.cc
+++ b/common/place/parallel_refine.cc
@@ -22,9 +22,8 @@
#if !defined(__wasm)
-#include "fast_bels.h"
+#include "detail_place_core.h"
#include "scope_lock.h"
-#include "timing.h"
#include <chrono>
#include <mutex>
@@ -35,515 +34,24 @@
NEXTPNR_NAMESPACE_BEGIN
namespace {
-struct Partition
-{
- int x0, y0, x1, y1;
- std::vector<CellInfo *> cells;
- Partition() = default;
- explicit Partition(Context *ctx)
- {
- x0 = ctx->getGridDimX();
- y0 = ctx->getGridDimY();
- x1 = 0;
- y1 = 0;
- for (auto &cell : ctx->cells) {
- Loc l = ctx->getBelLocation(cell.second->bel);
- x0 = std::min(x0, l.x);
- x1 = std::max(x1, l.x);
- y0 = std::min(y0, l.y);
- y1 = std::max(y1, l.y);
- cells.push_back(cell.second.get());
- }
- }
- void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r)
- {
- std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) {
- Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel);
- return yaxis ? (l0.y < l1.y) : (l0.x < l1.x);
- });
- size_t pivot_point = size_t(cells.size() * pivot);
- l.cells.clear();
- r.cells.clear();
- l.cells.reserve(pivot_point);
- r.cells.reserve(cells.size() - pivot_point);
- int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1)
- : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y
- : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x);
- for (size_t i = 0; i < cells.size(); i++) {
- Loc loc = ctx->getBelLocation(cells.at(i)->bel);
- ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i));
- }
- if (yaxis) {
- l.x0 = r.x0 = x0;
- l.x1 = r.x1 = x1;
- l.y0 = y0;
- l.y1 = pivot_coord;
- r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1);
- r.y1 = y1;
- } else {
- l.y0 = r.y0 = y0;
- l.y1 = r.y1 = y1;
- l.x0 = x0;
- l.x1 = pivot_coord;
- r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1);
- r.x1 = x1;
- }
- }
-};
-typedef int64_t wirelen_t;
-
-struct NetBB
+struct GlobalState : DetailPlacerState
{
- // Actual bounding box
- int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
- // Number of cells at each extremity
- int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0;
- wirelen_t hpwl(const ParallelRefineCfg &cfg) const
- {
- return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0));
- }
- static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr)
- {
- NetBB result{};
- if (!net->driver.cell)
- return result;
- auto bel_loc = [&](const CellInfo *cell) {
- BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel;
- return ctx->getBelLocation(bel);
- };
- result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1;
- Loc drv_loc = bel_loc(net->driver.cell);
- result.x0 = result.x1 = drv_loc.x;
- result.y0 = result.y1 = drv_loc.y;
- for (auto &usr : net->users) {
- Loc l = bel_loc(usr.cell);
- if (l.x == result.x0)
- ++result.nx0; // on the edge
- else if (l.x < result.x0) {
- result.x0 = l.x; // extends the edge
- result.nx0 = 1;
- }
- if (l.x == result.x1)
- ++result.nx1; // on the edge
- else if (l.x > result.x1) {
- result.x1 = l.x; // extends the edge
- result.nx1 = 1;
- }
- if (l.y == result.y0)
- ++result.ny0; // on the edge
- else if (l.y < result.y0) {
- result.y0 = l.y; // extends the edge
- result.ny0 = 1;
- }
- if (l.y == result.y1)
- ++result.ny1; // on the edge
- else if (l.y > result.y1) {
- result.y1 = l.y; // extends the edge
- result.ny1 = 1;
- }
- }
- return result;
- }
-};
+ explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : DetailPlacerState(ctx, this->cfg), cfg(cfg){};
-struct GlobalState
-{
- explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){};
- Context *ctx;
ParallelRefineCfg cfg;
- FastBels bels;
- std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index
- std::vector<NetBB> last_bounds;
- std::vector<std::vector<double>> last_tmg_costs;
- dict<IdString, NetBB> region_bounds;
- TimingAnalyser tmg;
-
- std::shared_timed_mutex archapi_mutex;
-
double temperature = 1e-7;
int radius = 3;
-
- wirelen_t total_wirelen = 0;
- double total_timing_cost = 0;
-
- double get_timing_cost(const NetInfo *net, store_index<PortRef> user,
- const dict<IdString, BelId> *cell2bel = nullptr)
- {
- if (!net->driver.cell)
- return 0;
- const auto &sink = net->users.at(user);
- IdString driver_pin, sink_pin;
- // Pick the first pin for a prediction; assume all will be similar enouhg
- for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) {
- driver_pin = pin;
- break;
- }
- for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) {
- sink_pin = pin;
- break;
- }
- float crit = tmg.get_criticality(CellPortKey(sink));
- BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel;
- BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel;
- double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin));
- return delay * std::pow(crit, cfg.crit_exp);
- }
-
- bool skip_net(const NetInfo *net) const
- {
- if (!net->driver.cell)
- return true;
- if (ctx->getBelGlobalBuf(net->driver.cell->bel))
- return true;
- return false;
- }
- bool timing_skip_net(const NetInfo *net) const
- {
- if (!net->driver.cell)
- return true;
- int cc;
- auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc);
- if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK)
- return true;
- return false;
- }
// ....
};
-struct ThreadState
+struct ThreadState : DetailPlacerThreadState
{
- Context *ctx; // Nextpnr context pointer
- GlobalState &g; // Refinement engine state
- int idx; // Index of the thread
- DeterministicRNG rng; // Local RNG
- // The cell partition that the thread works on
- Partition p;
- // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can
- // optimise
- std::vector<int> thread_net_idx;
- // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's
- // perspective
- std::vector<NetInfo *> thread_nets;
- std::vector<NetBB> net_bounds;
- std::vector<std::vector<double>> arc_tmg_cost;
- std::vector<bool> ignored_nets, tmg_ignored_nets;
- bool arch_state_dirty = false;
- // Our local cell-bel map; that won't be affected by out-of-partition moves
- dict<IdString, BelId> local_cell2bel;
-
- // Data on an inflight move
- dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new)
- // For cluster moves only
- std::vector<std::pair<CellInfo *, Loc>> cell_rel;
- // For incremental wirelength and delay updates
- wirelen_t wirelen_delta = 0;
- double timing_delta = 0;
-
+ ThreadState(Context *ctx, GlobalState &g, int idx) : DetailPlacerThreadState(ctx, g, idx), g(g){};
// Total made and accepted moved
+ GlobalState &g;
int n_move = 0, n_accept = 0;
- enum BoundChange
- {
- NO_CHANGE,
- CELL_MOVED_INWARDS,
- CELL_MOVED_OUTWARDS,
- FULL_RECOMPUTE
- };
- // Wirelen related are handled on a per-axis basis to reduce
- struct AxisChanges
- {
- std::vector<int> bounds_changed_nets;
- std::vector<BoundChange> already_bounds_changed;
- };
- std::array<AxisChanges, 2> axes;
- std::vector<NetBB> new_net_bounds;
-
- std::vector<std::vector<bool>> already_timing_changed;
- std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs;
- std::vector<double> new_timing_costs;
-
- ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){};
- void set_partition(const Partition &part)
- {
- p = part;
- thread_nets.clear();
- thread_net_idx.resize(g.flat_nets.size());
- std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1);
- // Determine the set of nets that are within the thread; and therefore we care about
- for (auto thread_cell : part.cells) {
- for (auto &port : thread_cell->ports) {
- if (!port.second.net)
- continue;
- int global_idx = port.second.net->udata;
- auto &thread_idx = thread_net_idx.at(global_idx);
- // Already added to the set
- if (thread_idx != -1)
- continue;
- thread_idx = thread_nets.size();
- thread_nets.push_back(port.second.net);
- }
- }
- tmg_ignored_nets.clear();
- ignored_nets.clear();
- for (auto tn : thread_nets) {
- ignored_nets.push_back(g.skip_net(tn));
- tmg_ignored_nets.push_back(g.timing_skip_net(tn));
- }
- // Set up the original cell-bel map for all nets inside the thread
- local_cell2bel.clear();
- for (NetInfo *net : thread_nets) {
- if (net->driver.cell)
- local_cell2bel[net->driver.cell->name] = net->driver.cell->bel;
- for (auto &usr : net->users)
- local_cell2bel[usr.cell->name] = usr.cell->bel;
- }
- }
- void setup_initial_state()
- {
- // Setup initial net bounding boxes and timing costs
- net_bounds.clear();
- arc_tmg_cost.clear();
- for (auto tn : thread_nets) {
- net_bounds.push_back(g.last_bounds.at(tn->udata));
- arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata));
- }
- new_net_bounds = net_bounds;
- for (int j = 0; j < 2; j++) {
- auto &a = axes.at(j);
- a.already_bounds_changed.resize(net_bounds.size());
- }
- already_timing_changed.clear();
- already_timing_changed.resize(net_bounds.size());
- for (size_t i = 0; i < thread_nets.size(); i++)
- already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity());
- }
- bool bounds_check(BelId bel)
- {
- Loc l = ctx->getBelLocation(bel);
- if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1)
- return false;
- return true;
- }
- bool bind_move()
- {
- std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- for (auto &entry : moved_cells) {
- ctx->unbindBel(entry.second.first);
- }
- bool success = true;
- for (auto &entry : moved_cells) {
- // Make sure targets are available before we bind them
- if (!ctx->checkBelAvail(entry.second.second)) {
- success = false;
- break;
- }
- ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
- }
- arch_state_dirty = true;
- return success;
- }
- bool check_validity()
- {
- std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- bool result = true;
- for (auto e : moved_cells) {
- if (!ctx->isBelLocationValid(e.second.first)) {
- // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer
- // enabling dedicated routes to be used
- result = false;
- break;
- }
- if (!ctx->isBelLocationValid(e.second.second)) {
- result = false;
- break;
- }
- }
- return result;
- }
- void revert_move()
- {
- if (arch_state_dirty) {
- // If changes to the arch state were made, revert them by restoring original cell bindings
- std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex);
- for (auto &entry : moved_cells) {
- BelId curr_bound = ctx->cells.at(entry.first)->bel;
- if (curr_bound != BelId())
- ctx->unbindBel(curr_bound);
- }
- for (auto &entry : moved_cells) {
- ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK);
- }
- arch_state_dirty = false;
- }
- for (auto &entry : moved_cells)
- local_cell2bel[entry.first] = entry.second.first;
- }
- void commit_move()
- {
- arch_state_dirty = false;
- for (auto &axis : axes) {
- for (auto bc : axis.bounds_changed_nets) {
- // Commit updated net bounds
- net_bounds.at(bc) = new_net_bounds.at(bc);
- }
- }
- if (g.cfg.timing_driven) {
- NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size());
- for (size_t i = 0; i < timing_changed_arcs.size(); i++) {
- auto arc = timing_changed_arcs.at(i);
- arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i);
- }
- }
- }
- void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel)
- {
- Loc new_loc = ctx->getBelLocation(new_bel);
- Loc old_loc = ctx->getBelLocation(old_bel);
- for (const auto &port : cell->ports) {
- NetInfo *pn = port.second.net;
- if (!pn)
- continue;
- int idx = thread_net_idx.at(pn->udata);
- if (ignored_nets.at(idx))
- continue;
- NetBB &new_bounds = new_net_bounds.at(idx);
- // For the x-axis (i=0) and y-axis (i=1)
- for (int i = 0; i < 2; i++) {
- auto &axis = axes.at(i);
- // New and old on this axis
- int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x;
- // References to updated bounding box entries
- auto &b0 = i ? new_bounds.y0 : new_bounds.x0;
- auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0;
- auto &b1 = i ? new_bounds.y1 : new_bounds.x1;
- auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1;
- auto &change = axis.already_bounds_changed.at(idx);
- // Lower bound
- if (new_pos < b0) {
- // Further out than current lower bound
- b0 = new_pos;
- n0 = 1;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (new_pos == b0 && old_pos > b0) {
- // Moved from inside into current bound
- ++n0;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (old_pos == b0 && new_pos > b0) {
- // Moved from current bound to inside
- if (change == NO_CHANGE)
- axis.bounds_changed_nets.push_back(idx);
- if (n0 == 1) {
- // Was the last cell on the bound; have to do a full recompute
- change = FULL_RECOMPUTE;
- } else {
- --n0;
- if (change == NO_CHANGE)
- change = CELL_MOVED_INWARDS;
- }
- }
- // Upper bound
- if (new_pos > b1) {
- // Further out than current upper bound
- b1 = new_pos;
- n1 = new_pos;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (new_pos == b1 && old_pos < b1) {
- // Moved onto current bound
- ++n1;
- if (change == NO_CHANGE) {
- change = CELL_MOVED_OUTWARDS;
- axis.bounds_changed_nets.push_back(idx);
- }
- } else if (old_pos == b1 && new_pos < b1) {
- // Moved from current bound to inside
- if (change == NO_CHANGE)
- axis.bounds_changed_nets.push_back(idx);
- if (n1 == 1) {
- // Was the last cell on the bound; have to do a full recompute
- change = FULL_RECOMPUTE;
- } else {
- --n1;
- if (change == NO_CHANGE)
- change = CELL_MOVED_INWARDS;
- }
- }
- }
- // Timing updates if timing driven
- if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) {
- if (port.second.type == PORT_OUT) {
- int cc;
- TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc);
- if (cls != TMG_IGNORE) {
- for (auto usr : pn->users.enumerate())
- if (!already_timing_changed.at(idx).at(usr.index.idx())) {
- timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index));
- already_timing_changed.at(idx).at(usr.index.idx()) = true;
- }
- }
- } else {
- auto usr = port.second.user_idx;
- if (!already_timing_changed.at(idx).at(usr.idx())) {
- timing_changed_arcs.emplace_back(std::make_pair(idx, usr));
- already_timing_changed.at(idx).at(usr.idx()) = true;
- }
- }
- }
- }
- }
- void compute_total_change()
- {
- auto &xa = axes.at(0), &ya = axes.at(1);
- for (auto &bc : xa.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
- new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
- for (auto &bc : ya.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE &&
- ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE)
- new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel);
- for (auto &bc : xa.bounds_changed_nets)
- wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
- for (auto &bc : ya.bounds_changed_nets)
- if (xa.already_bounds_changed.at(bc) == NO_CHANGE)
- wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg));
- if (g.cfg.timing_driven) {
- NPNR_ASSERT(new_timing_costs.empty());
- for (auto arc : timing_changed_arcs) {
- double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel);
- timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx()));
- new_timing_costs.push_back(new_cost);
- }
- }
- }
- void reset_move_state()
- {
- moved_cells.clear();
- cell_rel.clear();
- for (auto &axis : axes) {
- for (auto bc : axis.bounds_changed_nets) {
- new_net_bounds.at(bc) = net_bounds.at(bc);
- axis.already_bounds_changed[bc] = NO_CHANGE;
- }
- axis.bounds_changed_nets.clear();
- }
- for (auto &arc : timing_changed_arcs) {
- already_timing_changed.at(arc.first).at(arc.second.idx()) = false;
- }
- timing_changed_arcs.clear();
- new_timing_costs.clear();
- wirelen_delta = 0;
- timing_delta = 0;
- }
-
bool accept_move()
{
static constexpr double epsilon = 1e-20;
@@ -553,19 +61,6 @@ struct ThreadState
(g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature));
}
- bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel)
- {
- if (!bounds_check(old_bel) || !bounds_check(new_bel))
- return false;
- if (!ctx->isValidBelForCellType(cell->type, new_bel))
- return false;
- NPNR_ASSERT(!moved_cells.count(cell->name));
- moved_cells[cell->name] = std::make_pair(old_bel, new_bel);
- local_cell2bel[cell->name] = new_bel;
- compute_changes_for_cell(cell, old_bel, new_bel);
- return true;
- }
-
bool single_cell_swap(CellInfo *cell, BelId new_bel)
{
NPNR_ASSERT(moved_cells.empty());
@@ -806,14 +301,14 @@ struct ParallelRefine
g.bels.addCellType(cell_type);
}
};
- std::vector<Partition> parts;
+ std::vector<PlacePartition> parts;
void do_partition()
{
parts.clear();
parts.emplace_back(ctx);
bool yaxis = false;
while (parts.size() < t.size()) {
- std::vector<Partition> next(parts.size() * 2);
+ std::vector<PlacePartition> next(parts.size() * 2);
for (size_t i = 0; i < parts.size(); i++) {
// Randomly permute pivot every iteration so we get different thread boundaries
const float delta = 0.1;
@@ -834,28 +329,6 @@ struct ParallelRefine
w.join();
}
- void update_global_costs()
- {
- g.last_bounds.resize(g.flat_nets.size());
- g.last_tmg_costs.resize(g.flat_nets.size());
- g.total_wirelen = 0;
- g.total_timing_cost = 0;
- for (size_t i = 0; i < g.flat_nets.size(); i++) {
- NetInfo *ni = g.flat_nets.at(i);
- if (g.skip_net(ni))
- continue;
- g.last_bounds.at(i) = NetBB::compute(ctx, ni);
- g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg);
- if (!g.timing_skip_net(ni)) {
- auto &tc = g.last_tmg_costs.at(i);
- tc.resize(ni->users.capacity());
- for (auto usr : ni->users.enumerate()) {
- tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index);
- g.total_timing_cost += tc.at(usr.index.idx());
- }
- }
- }
- }
void run()
{
@@ -868,7 +341,7 @@ struct ParallelRefine
log_info("Running parallel refinement with %d threads.\n", int(t.size()));
int iter = 1;
bool done = false;
- update_global_costs();
+ g.update_global_costs();
double avg_wirelen = g.total_wirelen;
wirelen_t min_wirelen = g.total_wirelen;
while (true) {
@@ -914,7 +387,7 @@ struct ParallelRefine
for (auto &w : workers)
w.join();
g.tmg.run();
- update_global_costs();
+ g.update_global_costs();
iter++;
ctx->yield();
}
@@ -924,17 +397,14 @@ struct ParallelRefine
};
} // namespace
-ParallelRefineCfg::ParallelRefineCfg(Context *ctx)
+ParallelRefineCfg::ParallelRefineCfg(Context *ctx) : DetailPlaceCfg(ctx)
{
- timing_driven = ctx->setting<bool>("timing_driven");
threads = ctx->setting<int>("threads", 8);
// snap to nearest power of two; and minimum thread size
int actual_threads = 1;
while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size)
actual_threads *= 2;
threads = actual_threads;
- hpwl_scale_x = 1;
- hpwl_scale_y = 1;
}
bool parallel_refine(Context *ctx, ParallelRefineCfg cfg)
diff --git a/common/place/parallel_refine.h b/common/place/parallel_refine.h
index 556317cd..a6c8b469 100644
--- a/common/place/parallel_refine.h
+++ b/common/place/parallel_refine.h
@@ -20,18 +20,16 @@
#ifndef PARALLEL_REFINE_H
#define PARALLEL_REFINE_H
+#include "detail_place_cfg.h"
#include "nextpnr.h"
NEXTPNR_NAMESPACE_BEGIN
-struct ParallelRefineCfg
+struct ParallelRefineCfg : DetailPlaceCfg
{
ParallelRefineCfg(Context *ctx);
- bool timing_driven;
int threads;
- int hpwl_scale_x, hpwl_scale_y;
double lambda = 0.5f;
- float crit_exp = 8;
int inner_iters = 15;
int min_thread_size = 500;
};