diff options
45 files changed, 2352 insertions, 461 deletions
diff --git a/common/command.cc b/common/command.cc index d4279a58..56327847 100644 --- a/common/command.cc +++ b/common/command.cc @@ -109,6 +109,7 @@ po::options_description CommandHandler::getGeneralOptions() general.add_options()("debug", "debug output"); general.add_options()("debug-placer", "debug output from placer only"); general.add_options()("debug-router", "debug output from router only"); + general.add_options()("threads", po::value<int>(), "number of threads for passes where this is configurable"); general.add_options()("force,f", "keep running after errors"); #ifndef NO_GUI @@ -173,6 +174,8 @@ po::options_description CommandHandler::getGeneralOptions() "placer heap criticality exponent (int, default: 2)"); general.add_options()("placer-heap-timingweight", po::value<int>(), "placer heap timing weight (int, default: 10)"); + general.add_options()("parallel-refine", "use new experimental parallelised engine for placement refinement"); + general.add_options()("router2-heatmap", po::value<std::string>(), "prefix for router2 resource congestion heatmaps"); @@ -225,6 +228,10 @@ void CommandHandler::setupContext(Context *ctx) ctx->rngseed(vm["seed"].as<int>()); } + if (vm.count("threads")) { + ctx->settings[ctx->id("threads")] = vm["threads"].as<int>(); + } + if (vm.count("randomize-seed")) { std::random_device randDev{}; std::uniform_int_distribution<int> distrib{1}; @@ -298,6 +305,10 @@ void CommandHandler::setupContext(Context *ctx) if (vm.count("placer-heap-timingweight")) ctx->settings[ctx->id("placerHeap/timingWeight")] = std::to_string(vm["placer-heap-timingweight"].as<int>()); + + if (vm.count("parallel-refine")) + ctx->settings[ctx->id("placerHeap/parallelRefine")] = true; + if (vm.count("router2-heatmap")) ctx->settings[ctx->id("router2/heatmap")] = vm["router2-heatmap"].as<std::string>(); if (vm.count("tmg-ripup") || vm.count("router2-tmg-ripup")) diff --git a/common/context.cc b/common/context.cc index faddf825..e35d3e49 100644 --- a/common/context.cc +++ b/common/context.cc @@ -334,13 +334,13 @@ void Context::check() const nameOf(port.first), nameOf(net)); } } else if (port.second.type == PORT_IN) { - int usr_count = std::count_if(net->users.begin(), net->users.end(), [&](const PortRef &pr) { - return pr.cell == c.second.get() && pr.port == port.first; - }); - if (usr_count != 1) - CHECK_FAIL("input cell port '%s.%s' appears %d rather than expected 1 times in users vector of " - "net '%s'\n", - nameOf(c.first), nameOf(port.first), usr_count, nameOf(net)); + if (!port.second.user_idx) + CHECK_FAIL("input cell port '%s.%s' on net '%s' has no user index\n", nameOf(c.first), + nameOf(port.first), nameOf(net)); + auto net_user = net->users.at(port.second.user_idx); + if (net_user.cell != c.second.get() || net_user.port != port.first) + CHECK_FAIL("input cell port '%s.%s' not in associated user entry of net '%s'\n", + nameOf(c.first), nameOf(port.first), nameOf(net)); } } } diff --git a/common/design_utils.h b/common/design_utils.h index 63cb71d7..069600b5 100644 --- a/common/design_utils.h +++ b/common/design_utils.h @@ -47,14 +47,18 @@ CellInfo *net_only_drives(const Context *ctx, NetInfo *net, F1 cell_pred, IdStri return nullptr; if (exclusive) { if (exclude == nullptr) { - if (net->users.size() != 1) + if (net->users.entries() != 1) return nullptr; } else { - if (net->users.size() > 2) { + if (net->users.entries() > 2) { return nullptr; - } else if (net->users.size() == 2) { - if (std::find_if(net->users.begin(), net->users.end(), - [exclude](const PortRef &ref) { return ref.cell == exclude; }) == net->users.end()) + } else if (net->users.entries() == 2) { + bool found = false; + for (auto &usr : net->users) { + if (usr.cell == exclude) + found = true; + } + if (!found) return nullptr; } } diff --git a/common/indexed_store.h b/common/indexed_store.h new file mode 100644 index 00000000..df607c13 --- /dev/null +++ b/common/indexed_store.h @@ -0,0 +1,297 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021-22 gatecat <gatecat@ds0.me> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef INDEXED_STORE_H +#define INDEXED_STORE_H + +#include <algorithm> +#include <limits> +#include <type_traits> +#include <vector> + +#include "nextpnr_assertions.h" +#include "nextpnr_namespaces.h" + +NEXTPNR_NAMESPACE_BEGIN + +template <typename T> struct store_index +{ + int32_t m_index = -1; + store_index() = default; + explicit store_index(int32_t index) : m_index(index){}; + int32_t idx() const { return m_index; } + void set(int32_t index) { m_index = index; } + bool empty() const { return m_index == -1; } + bool operator==(const store_index<T> &other) const { return m_index == other.m_index; } + bool operator!=(const store_index<T> &other) const { return m_index != other.m_index; } + bool operator<(const store_index<T> &other) const { return m_index < other.m_index; } + unsigned int hash() const { return m_index; } + + operator bool() const { return !empty(); } + operator int() const = delete; + bool operator!() const { return empty(); } +}; + +// "Slotted" indexed object store +template <typename T> class indexed_store +{ + private: + // This should move to using std::optional at some point + class slot + { + private: + alignas(T) unsigned char storage[sizeof(T)]; + int32_t next_free; + bool active; + inline T &obj() { return reinterpret_cast<T &>(storage); } + inline const T &obj() const { return reinterpret_cast<const T &>(storage); } + friend class indexed_store<T>; + + public: + slot() : next_free(std::numeric_limits<int32_t>::max()), active(false){}; + slot(slot &&other) : next_free(other.next_free), active(other.active) + { + if (active) + ::new (static_cast<void *>(&storage)) T(std::move(other.obj())); + }; + + slot(const slot &other) : next_free(other.next_free), active(other.active) + { + if (active) + ::new (static_cast<void *>(&storage)) T(other.obj()); + }; + + template <class... Args> void create(Args &&...args) + { + NPNR_ASSERT(!active); + active = true; + ::new (static_cast<void *>(&storage)) T(std::forward<Args &&>(args)...); + } + bool empty() const { return !active; } + T &get() + { + NPNR_ASSERT(active); + return reinterpret_cast<T &>(storage); + } + const T &get() const + { + NPNR_ASSERT(active); + return reinterpret_cast<const T &>(storage); + } + void free(int32_t first_free) + { + NPNR_ASSERT(active); + obj().~T(); + active = false; + next_free = first_free; + } + ~slot() + { + if (active) + obj().~T(); + } + }; + + std::vector<slot> slots; + int32_t first_free = 0; + int32_t active_count = 0; + + public: + // Create a new entry and return its index + template <class... Args> store_index<T> add(Args &&...args) + { + ++active_count; + if (first_free == int32_t(slots.size())) { + slots.emplace_back(); + slots.back().create(std::forward<Args &&>(args)...); + ++first_free; + return store_index<T>(int32_t(slots.size()) - 1); + } else { + int32_t idx = first_free; + auto &slot = slots.at(idx); + first_free = slot.next_free; + slot.create(std::forward<Args &&>(args)...); + return store_index<T>(idx); + } + } + + // Remove an entry at an index + void remove(store_index<T> idx) + { + --active_count; + slots.at(idx.m_index).free(first_free); + first_free = idx.m_index; + } + + void clear() + { + active_count = 0; + first_free = 0; + slots.clear(); + } + + // Number of live entries + int32_t entries() const { return active_count; } + bool empty() const { return (entries() == 0); } + + // Reserve a certain amount of space + void reserve(int32_t size) { slots.reserve(size); } + + // Check if an index exists + int32_t count(store_index<T> idx) + { + if (idx.m_index < 0 || idx.m_index >= int32_t(slots.size())) + return 0; + return slots.at(idx.m_index).empty() ? 0 : 1; + } + + // Get an item by index + T &at(store_index<T> idx) { return slots.at(idx.m_index).get(); } + const T &at(store_index<T> idx) const { return slots.at(idx.m_index).get(); } + T &operator[](store_index<T> idx) { return slots.at(idx.m_index).get(); } + const T &operator[](store_index<T> idx) const { return slots.at(idx.m_index).get(); } + + // Total size of the container + int32_t capacity() const { return int32_t(slots.size()); } + + // Iterate over items + template <typename It, typename S> class enumerated_iterator; + + class iterator + { + private: + indexed_store *base; + int32_t index = 0; + + public: + iterator(indexed_store *base, int32_t index) : base(base), index(index){}; + inline bool operator!=(const iterator &other) const { return other.index != index; } + inline bool operator==(const iterator &other) const { return other.index == index; } + inline iterator operator++() + { + // skip over unused slots + do { + index++; + } while (index < int32_t(base->slots.size()) && !base->slots.at(index).active); + return *this; + } + inline iterator operator++(int) + { + iterator prior(*this); + do { + index++; + } while (index < int32_t(base->slots.size()) && !base->slots.at(index).active); + return prior; + } + T &operator*() { return base->at(store_index<T>(index)); } + template <typename It, typename S> friend class indexed_store::enumerated_iterator; + }; + iterator begin() + { + auto it = iterator{this, -1}; + ++it; + return it; + } + iterator end() { return iterator{this, int32_t(slots.size())}; } + + class const_iterator + { + private: + const indexed_store *base; + int32_t index = 0; + + public: + const_iterator(const indexed_store *base, int32_t index) : base(base), index(index){}; + inline bool operator!=(const const_iterator &other) const { return other.index != index; } + inline bool operator==(const const_iterator &other) const { return other.index == index; } + inline const_iterator operator++() + { + // skip over unused slots + do { + index++; + } while (index < int32_t(base->slots.size()) && !base->slots.at(index).active); + return *this; + } + inline const_iterator operator++(int) + { + iterator prior(*this); + do { + index++; + } while (index < int32_t(base->slots.size()) && !base->slots.at(index).active); + return prior; + } + const T &operator*() { return base->at(store_index<T>(index)); } + template <typename It, typename S> friend class indexed_store::enumerated_iterator; + }; + const_iterator begin() const + { + auto it = const_iterator{this, -1}; + ++it; + return it; + } + const_iterator end() const { return const_iterator{this, int32_t(slots.size())}; } + + template <typename S> struct enumerated_item + { + enumerated_item(int32_t index, T &value) : index(index), value(value){}; + store_index<std::remove_cv_t<S>> index; + S &value; + }; + + template <typename It, typename S> class enumerated_iterator + { + private: + It base; + + public: + enumerated_iterator(const It &base) : base(base){}; + inline bool operator!=(const enumerated_iterator<It, S> &other) const { return other.base != base; } + inline bool operator==(const enumerated_iterator<It, S> &other) const { return other.base == base; } + inline enumerated_iterator<It, S> operator++() + { + ++base; + return *this; + } + inline enumerated_iterator<It, S> operator++(int) + { + iterator prior(*this); + ++base; + return prior; + } + enumerated_item<S> operator*() { return enumerated_item<S>{base.index, *base}; } + }; + + template <typename It, typename S> struct enumerated_range + { + enumerated_range(const It &begin, const It &end) : m_begin(begin), m_end(end){}; + enumerated_iterator<It, S> m_begin, m_end; + enumerated_iterator<It, S> begin() { return m_begin; } + enumerated_iterator<It, S> end() { return m_end; } + }; + + enumerated_range<iterator, T> enumerate() { return enumerated_range<iterator, T>{begin(), end()}; } + enumerated_range<const_iterator, const T> enumerate() const + { + return enumerated_range<iterator, T>{begin(), end()}; + } +}; + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/nextpnr_types.cc b/common/nextpnr_types.cc index c89a0071..57d816c0 100644 --- a/common/nextpnr_types.cc +++ b/common/nextpnr_types.cc @@ -66,7 +66,7 @@ void CellInfo::connectPort(IdString port_name, NetInfo *net) PortRef user; user.cell = this; user.port = port_name; - net->users.push_back(user); + port.user_idx = net->users.add(user); } else { NPNR_ASSERT_FALSE("invalid port type for connect_port"); } @@ -78,11 +78,8 @@ void CellInfo::disconnectPort(IdString port_name) return; PortInfo &port = ports.at(port_name); if (port.net != nullptr) { - port.net->users.erase(std::remove_if(port.net->users.begin(), port.net->users.end(), - [this, port_name](const PortRef &user) { - return user.cell == this && user.port == port_name; - }), - port.net->users.end()); + if (port.user_idx) + port.net->users.remove(port.user_idx); if (port.net->driver.cell == this && port.net->driver.port == port_name) port.net->driver.cell = nullptr; port.net = nullptr; @@ -116,7 +113,9 @@ void CellInfo::movePortTo(IdString port, CellInfo *other, IdString other_port) NPNR_ASSERT(old.type == rep.type); rep.net = old.net; + rep.user_idx = old.user_idx; old.net = nullptr; + old.user_idx = store_index<PortRef>{}; if (rep.type == PORT_OUT) { if (rep.net != nullptr) { rep.net->driver.cell = other; @@ -124,12 +123,9 @@ void CellInfo::movePortTo(IdString port, CellInfo *other, IdString other_port) } } else if (rep.type == PORT_IN) { if (rep.net != nullptr) { - for (PortRef &load : rep.net->users) { - if (load.cell == this && load.port == port) { - load.cell = other; - load.port = other_port; - } - } + auto &load = rep.net->users.at(rep.user_idx); + load.cell = other; + load.port = other_port; } } else { NPNR_ASSERT(false); @@ -144,9 +140,8 @@ void CellInfo::renamePort(IdString old_name, IdString new_name) if (pi.net != nullptr) { if (pi.net->driver.cell == this && pi.net->driver.port == old_name) pi.net->driver.port = new_name; - for (auto &usr : pi.net->users) - if (usr.cell == this && usr.port == old_name) - usr.port = new_name; + if (pi.user_idx) + pi.net->users.at(pi.user_idx).port = new_name; } ports.erase(old_name); pi.name = new_name; diff --git a/common/nextpnr_types.h b/common/nextpnr_types.h index cf93a071..c21182cc 100644 --- a/common/nextpnr_types.h +++ b/common/nextpnr_types.h @@ -29,6 +29,7 @@ #include "archdefs.h" #include "hashlib.h" +#include "indexed_store.h" #include "nextpnr_base_types.h" #include "nextpnr_namespaces.h" #include "property.h" @@ -129,7 +130,7 @@ struct NetInfo : ArchNetInfo int32_t udata = 0; PortRef driver; - std::vector<PortRef> users; + indexed_store<PortRef> users; dict<IdString, Property> attrs; // wire -> uphill_pip @@ -154,6 +155,7 @@ struct PortInfo IdString name; NetInfo *net; PortType type; + store_index<PortRef> user_idx{}; }; struct Context; diff --git a/common/parallel_refine.cc b/common/parallel_refine.cc new file mode 100644 index 00000000..ea97b852 --- /dev/null +++ b/common/parallel_refine.cc @@ -0,0 +1,946 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021-22 gatecat <gatecat@ds0.me> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "parallel_refine.h" +#include "fast_bels.h" +#include "log.h" +#include "timing.h" +#include "scope_lock.h" + +#include <chrono> +#include <mutex> +#include <queue> +#include <shared_mutex> +#include <thread> + +NEXTPNR_NAMESPACE_BEGIN + +namespace { +struct Partition +{ + int x0, y0, x1, y1; + std::vector<CellInfo *> cells; + Partition() = default; + explicit Partition(Context *ctx) + { + x0 = ctx->getGridDimX(); + y0 = ctx->getGridDimY(); + x1 = 0; + y1 = 0; + for (auto &cell : ctx->cells) { + Loc l = ctx->getBelLocation(cell.second->bel); + x0 = std::min(x0, l.x); + x1 = std::max(x1, l.x); + y0 = std::min(y0, l.y); + y1 = std::max(y1, l.y); + cells.push_back(cell.second.get()); + } + } + void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r) + { + std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) { + Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel); + return yaxis ? (l0.y < l1.y) : (l0.x < l1.x); + }); + size_t pivot_point = size_t(cells.size() * pivot); + l.cells.clear(); + r.cells.clear(); + l.cells.reserve(pivot_point); + r.cells.reserve(cells.size() - pivot_point); + int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1) + : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y + : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x); + for (size_t i = 0; i < cells.size(); i++) { + Loc loc = ctx->getBelLocation(cells.at(i)->bel); + ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i)); + } + if (yaxis) { + l.x0 = r.x0 = x0; + l.x1 = r.x1 = x1; + l.y0 = y0; + l.y1 = pivot_coord; + r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1); + r.y1 = y1; + } else { + l.y0 = r.y0 = y0; + l.y1 = r.y1 = y1; + l.x0 = x0; + l.x1 = pivot_coord; + r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1); + r.x1 = x1; + } + } +}; + +typedef int64_t wirelen_t; + +struct NetBB +{ + // Actual bounding box + int x0 = 0, x1 = 0, y0 = 0, y1 = 0; + // Number of cells at each extremity + int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; + wirelen_t hpwl(const ParallelRefineCfg &cfg) const + { + return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); + } + static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr) + { + NetBB result{}; + if (!net->driver.cell) + return result; + auto bel_loc = [&](const CellInfo *cell) { + BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel; + return ctx->getBelLocation(bel); + }; + result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1; + Loc drv_loc = bel_loc(net->driver.cell); + result.x0 = result.x1 = drv_loc.x; + result.y0 = result.y1 = drv_loc.y; + for (auto &usr : net->users) { + Loc l = bel_loc(usr.cell); + if (l.x == result.x0) + ++result.nx0; // on the edge + else if (l.x < result.x0) { + result.x0 = l.x; // extends the edge + result.nx0 = 1; + } + if (l.x == result.x1) + ++result.nx1; // on the edge + else if (l.x > result.x1) { + result.x1 = l.x; // extends the edge + result.nx1 = 1; + } + if (l.y == result.y0) + ++result.ny0; // on the edge + else if (l.y < result.y0) { + result.y0 = l.y; // extends the edge + result.ny0 = 1; + } + if (l.y == result.y1) + ++result.ny1; // on the edge + else if (l.y > result.y1) { + result.y1 = l.y; // extends the edge + result.ny1 = 1; + } + } + return result; + } +}; + +struct GlobalState +{ + explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){}; + Context *ctx; + ParallelRefineCfg cfg; + FastBels bels; + std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index + std::vector<NetBB> last_bounds; + std::vector<std::vector<double>> last_tmg_costs; + dict<IdString, NetBB> region_bounds; + TimingAnalyser tmg; + + std::shared_timed_mutex archapi_mutex; + + double temperature = 1e-7; + int radius = 3; + + wirelen_t total_wirelen = 0; + double total_timing_cost = 0; + + double get_timing_cost(const NetInfo *net, store_index<PortRef> user, + const dict<IdString, BelId> *cell2bel = nullptr) + { + if (!net->driver.cell) + return 0; + const auto &sink = net->users.at(user); + IdString driver_pin, sink_pin; + // Pick the first pin for a prediction; assume all will be similar enouhg + for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) { + driver_pin = pin; + break; + } + for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) { + sink_pin = pin; + break; + } + float crit = tmg.get_criticality(CellPortKey(sink)); + BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel; + BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel; + double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin)); + return delay * std::pow(crit, cfg.crit_exp); + } + + bool skip_net(const NetInfo *net) const + { + if (!net->driver.cell) + return true; + if (ctx->getBelGlobalBuf(net->driver.cell->bel)) + return true; + return false; + } + bool timing_skip_net(const NetInfo *net) const + { + if (!net->driver.cell) + return true; + int cc; + auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc); + if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK) + return true; + return false; + } + // .... +}; + +struct ThreadState +{ + Context *ctx; // Nextpnr context pointer + GlobalState &g; // Refinement engine state + int idx; // Index of the thread + DeterministicRNG rng; // Local RNG + // The cell partition that the thread works on + Partition p; + // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can + // optimise + std::vector<int> thread_net_idx; + // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's + // perspective + std::vector<NetInfo *> thread_nets; + std::vector<NetBB> net_bounds; + std::vector<std::vector<double>> arc_tmg_cost; + std::vector<bool> ignored_nets, tmg_ignored_nets; + bool arch_state_dirty = false; + // Our local cell-bel map; that won't be affected by out-of-partition moves + dict<IdString, BelId> local_cell2bel; + + // Data on an inflight move + dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new) + // For cluster moves only + std::vector<std::pair<CellInfo *, Loc>> cell_rel; + // For incremental wirelength and delay updates + wirelen_t wirelen_delta = 0; + double timing_delta = 0; + + // Total made and accepted moved + int n_move = 0, n_accept = 0; + + enum BoundChange + { + NO_CHANGE, + CELL_MOVED_INWARDS, + CELL_MOVED_OUTWARDS, + FULL_RECOMPUTE + }; + // Wirelen related are handled on a per-axis basis to reduce + struct AxisChanges + { + std::vector<int> bounds_changed_nets; + std::vector<BoundChange> already_bounds_changed; + }; + std::array<AxisChanges, 2> axes; + std::vector<NetBB> new_net_bounds; + + std::vector<std::vector<bool>> already_timing_changed; + std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs; + std::vector<double> new_timing_costs; + + ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){}; + void set_partition(const Partition &part) + { + p = part; + thread_nets.clear(); + thread_net_idx.resize(g.flat_nets.size()); + std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1); + // Determine the set of nets that are within the thread; and therefore we care about + for (auto thread_cell : part.cells) { + for (auto &port : thread_cell->ports) { + if (!port.second.net) + continue; + int global_idx = port.second.net->udata; + auto &thread_idx = thread_net_idx.at(global_idx); + // Already added to the set + if (thread_idx != -1) + continue; + thread_idx = thread_nets.size(); + thread_nets.push_back(port.second.net); + } + } + tmg_ignored_nets.clear(); + ignored_nets.clear(); + for (auto tn : thread_nets) { + ignored_nets.push_back(g.skip_net(tn)); + tmg_ignored_nets.push_back(g.timing_skip_net(tn)); + } + // Set up the original cell-bel map for all nets inside the thread + local_cell2bel.clear(); + for (NetInfo *net : thread_nets) { + if (net->driver.cell) + local_cell2bel[net->driver.cell->name] = net->driver.cell->bel; + for (auto &usr : net->users) + local_cell2bel[usr.cell->name] = usr.cell->bel; + } + } + void setup_initial_state() + { + // Setup initial net bounding boxes and timing costs + net_bounds.clear(); + arc_tmg_cost.clear(); + for (auto tn : thread_nets) { + net_bounds.push_back(g.last_bounds.at(tn->udata)); + arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata)); + } + new_net_bounds = net_bounds; + for (int j = 0; j < 2; j++) { + auto &a = axes.at(j); + a.already_bounds_changed.resize(net_bounds.size()); + } + already_timing_changed.clear(); + already_timing_changed.resize(net_bounds.size()); + for (size_t i = 0; i < thread_nets.size(); i++) + already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity()); + } + bool bounds_check(BelId bel) + { + Loc l = ctx->getBelLocation(bel); + if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1) + return false; + return true; + } + bool bind_move() + { + std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); + for (auto &entry : moved_cells) { + ctx->unbindBel(entry.second.first); + } + bool success = true; + for (auto &entry : moved_cells) { + // Make sure targets are available before we bind them + if (!ctx->checkBelAvail(entry.second.second)) { + success = false; + break; + } + ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); + } + arch_state_dirty = true; + return success; + } + bool check_validity() + { + std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex); + bool result = true; + for (auto e : moved_cells) { + if (!ctx->isBelLocationValid(e.second.first)) { + // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer + // enabling dedicated routes to be used + result = false; + break; + } + if (!ctx->isBelLocationValid(e.second.second)) { + result = false; + break; + } + } + return result; + } + void revert_move() + { + if (arch_state_dirty) { + // If changes to the arch state were made, revert them by restoring original cell bindings + std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); + for (auto &entry : moved_cells) { + BelId curr_bound = ctx->cells.at(entry.first)->bel; + if (curr_bound != BelId()) + ctx->unbindBel(curr_bound); + } + for (auto &entry : moved_cells) { + ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); + } + arch_state_dirty = false; + } + for (auto &entry : moved_cells) + local_cell2bel[entry.first] = entry.second.first; + } + void commit_move() + { + arch_state_dirty = false; + for (auto &axis : axes) { + for (auto bc : axis.bounds_changed_nets) { + // Commit updated net bounds + net_bounds.at(bc) = new_net_bounds.at(bc); + } + } + if (g.cfg.timing_driven) { + NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size()); + for (size_t i = 0; i < timing_changed_arcs.size(); i++) { + auto arc = timing_changed_arcs.at(i); + arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i); + } + } + } + void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel) + { + Loc new_loc = ctx->getBelLocation(new_bel); + Loc old_loc = ctx->getBelLocation(old_bel); + for (const auto &port : cell->ports) { + NetInfo *pn = port.second.net; + if (!pn) + continue; + int idx = thread_net_idx.at(pn->udata); + if (ignored_nets.at(idx)) + continue; + NetBB &new_bounds = new_net_bounds.at(idx); + // For the x-axis (i=0) and y-axis (i=1) + for (int i = 0; i < 2; i++) { + auto &axis = axes.at(i); + // New and old on this axis + int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x; + // References to updated bounding box entries + auto &b0 = i ? new_bounds.y0 : new_bounds.x0; + auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0; + auto &b1 = i ? new_bounds.y1 : new_bounds.x1; + auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1; + auto &change = axis.already_bounds_changed.at(idx); + // Lower bound + if (new_pos < b0) { + // Further out than current lower bound + b0 = new_pos; + n0 = 1; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (new_pos == b0 && old_pos > b0) { + // Moved from inside into current bound + ++n0; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (old_pos == b0 && new_pos > b0) { + // Moved from current bound to inside + if (change == NO_CHANGE) + axis.bounds_changed_nets.push_back(idx); + if (n0 == 1) { + // Was the last cell on the bound; have to do a full recompute + change = FULL_RECOMPUTE; + } else { + --n0; + if (change == NO_CHANGE) + change = CELL_MOVED_INWARDS; + } + } + // Upper bound + if (new_pos > b1) { + // Further out than current upper bound + b1 = new_pos; + n1 = new_pos; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (new_pos == b1 && old_pos < b1) { + // Moved onto current bound + ++n1; + if (change == NO_CHANGE) { + change = CELL_MOVED_OUTWARDS; + axis.bounds_changed_nets.push_back(idx); + } + } else if (old_pos == b1 && new_pos < b1) { + // Moved from current bound to inside + if (change == NO_CHANGE) + axis.bounds_changed_nets.push_back(idx); + if (n1 == 1) { + // Was the last cell on the bound; have to do a full recompute + change = FULL_RECOMPUTE; + } else { + --n1; + if (change == NO_CHANGE) + change = CELL_MOVED_INWARDS; + } + } + } + // Timing updates if timing driven + if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) { + if (port.second.type == PORT_OUT) { + int cc; + TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); + if (cls != TMG_IGNORE) { + for (auto usr : pn->users.enumerate()) + if (!already_timing_changed.at(idx).at(usr.index.idx())) { + timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index)); + already_timing_changed.at(idx).at(usr.index.idx()) = true; + } + } + } else { + auto usr = port.second.user_idx; + if (!already_timing_changed.at(idx).at(usr.idx())) { + timing_changed_arcs.emplace_back(std::make_pair(idx, usr)); + already_timing_changed.at(idx).at(usr.idx()) = true; + } + } + } + } + } + void compute_total_change() + { + auto &xa = axes.at(0), &ya = axes.at(1); + for (auto &bc : xa.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE) + new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); + for (auto &bc : ya.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && + ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE) + new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); + for (auto &bc : xa.bounds_changed_nets) + wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); + for (auto &bc : ya.bounds_changed_nets) + if (xa.already_bounds_changed.at(bc) == NO_CHANGE) + wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); + if (g.cfg.timing_driven) { + NPNR_ASSERT(new_timing_costs.empty()); + for (auto arc : timing_changed_arcs) { + double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel); + timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx())); + new_timing_costs.push_back(new_cost); + } + } + } + void reset_move_state() + { + moved_cells.clear(); + cell_rel.clear(); + for (auto &axis : axes) { + for (auto bc : axis.bounds_changed_nets) { + new_net_bounds.at(bc) = net_bounds.at(bc); + axis.already_bounds_changed[bc] = NO_CHANGE; + } + axis.bounds_changed_nets.clear(); + } + for (auto &arc : timing_changed_arcs) { + already_timing_changed.at(arc.first).at(arc.second.idx()) = false; + } + timing_changed_arcs.clear(); + new_timing_costs.clear(); + wirelen_delta = 0; + timing_delta = 0; + } + + bool accept_move() + { + double delta = g.cfg.lambda * (timing_delta / std::max<double>(epsilon, g.total_timing_cost)) + + (1.0 - g.cfg.lambda) * (double(wirelen_delta) / std::max<double>(epsilon, g.total_wirelen)); + return delta < 0 || + (g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature)); + } + + bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel) + { + if (!bounds_check(old_bel) || !bounds_check(new_bel)) + return false; + if (!ctx->isValidBelForCellType(cell->type, new_bel)) + return false; + NPNR_ASSERT(!moved_cells.count(cell->name)); + moved_cells[cell->name] = std::make_pair(old_bel, new_bel); + local_cell2bel[cell->name] = new_bel; + compute_changes_for_cell(cell, old_bel, new_bel); + return true; + } + + static constexpr double epsilon = 1e-20; + bool single_cell_swap(CellInfo *cell, BelId new_bel) + { + NPNR_ASSERT(moved_cells.empty()); + BelId old_bel = cell->bel; + CellInfo *bound = ctx->getBoundBelCell(new_bel); + if (bound && (bound->belStrength > STRENGTH_STRONG || bound->cluster != ClusterId())) + return false; + if (!add_to_move(cell, old_bel, new_bel)) + goto fail; + if (bound && !add_to_move(bound, new_bel, old_bel)) + goto fail; + compute_total_change(); + // SA acceptance criteria + + if (!accept_move()) { + // SA fail + goto fail; + } + // Check validity rules + if (!bind_move()) + goto fail; + if (!check_validity()) + goto fail; + // Accepted! + commit_move(); + reset_move_state(); + return true; + fail: + revert_move(); + reset_move_state(); + return false; + } + + bool chain_swap(CellInfo *root_cell, BelId new_root_bel) + { + NPNR_ASSERT(moved_cells.empty()); + std::queue<std::pair<ClusterId, BelId>> displaced_clusters; + pool<BelId> used_bels; + displaced_clusters.emplace(root_cell->cluster, new_root_bel); + while (!displaced_clusters.empty()) { + std::vector<std::pair<CellInfo *, BelId>> dest_bels; + auto cursor = displaced_clusters.front(); + displaced_clusters.pop(); + if (!ctx->getClusterPlacement(cursor.first, cursor.second, dest_bels)) + goto fail; + for (const auto &db : dest_bels) { + BelId old_bel = db.first->bel; + if (moved_cells.count(db.first->name)) + goto fail; + if (!add_to_move(db.first, old_bel, db.second)) + goto fail; + if (used_bels.count(db.second)) + goto fail; + used_bels.insert(db.second); + + CellInfo *bound = ctx->getBoundBelCell(db.second); + if (bound) { + if (moved_cells.count(bound->name)) { + // Don't move a cell multiple times in the same go + goto fail; + } else if (bound->belStrength > STRENGTH_STRONG) { + goto fail; + } else if (bound->cluster != ClusterId()) { + // Displace the entire cluster + Loc old_loc = ctx->getBelLocation(old_bel); + Loc bound_loc = ctx->getBelLocation(bound->bel); + Loc root_loc = ctx->getBelLocation(ctx->getClusterRootCell(bound->cluster)->bel); + BelId new_root = ctx->getBelByLocation(Loc(old_loc.x + (root_loc.x - bound_loc.x), + old_loc.y + (root_loc.y - bound_loc.y), + old_loc.z + (root_loc.z - bound_loc.z))); + if (new_root == BelId()) + goto fail; + displaced_clusters.emplace(bound->cluster, new_root); + } else { + // Single cell swap + if (used_bels.count(old_bel)) + goto fail; + used_bels.insert(old_bel); + if (!add_to_move(bound, bound->bel, old_bel)) + goto fail; + } + } else if (!ctx->checkBelAvail(db.second)) { + goto fail; + } + } + } + compute_total_change(); + // SA acceptance criteria + + if (!accept_move()) { + // SA fail + goto fail; + } + // Check validity rules + if (!bind_move()) + goto fail; + if (!check_validity()) + goto fail; + // Accepted! + commit_move(); + reset_move_state(); + return true; + fail: + revert_move(); + reset_move_state(); + return false; + } + + BelId random_bel_for_cell(CellInfo *cell, int force_z = -1) + { + IdString targetType = cell->type; + Loc curr_loc = ctx->getBelLocation(cell->bel); + int count = 0; + + int dx = g.radius, dy = g.radius; + if (cell->region != nullptr && cell->region->constr_bels) { + dx = std::min(g.cfg.hpwl_scale_x * g.radius, + (g.region_bounds[cell->region->name].x1 - g.region_bounds[cell->region->name].x0) + 1); + dy = std::min(g.cfg.hpwl_scale_y * g.radius, + (g.region_bounds[cell->region->name].y1 - g.region_bounds[cell->region->name].y0) + 1); + // Clamp location to within bounds + curr_loc.x = std::max(g.region_bounds[cell->region->name].x0, curr_loc.x); + curr_loc.x = std::min(g.region_bounds[cell->region->name].x1, curr_loc.x); + curr_loc.y = std::max(g.region_bounds[cell->region->name].y0, curr_loc.y); + curr_loc.y = std::min(g.region_bounds[cell->region->name].y1, curr_loc.y); + } + + FastBels::FastBelsData *bel_data; + auto type_cnt = g.bels.getBelsForCellType(targetType, &bel_data); + + while (true) { + int nx = rng.rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0); + int ny = rng.rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0); + if (type_cnt < 64) + nx = ny = 0; + if (nx >= int(bel_data->size())) + continue; + if (ny >= int(bel_data->at(nx).size())) + continue; + const auto &fb = bel_data->at(nx).at(ny); + if (fb.size() == 0) + continue; + BelId bel = fb.at(rng.rng(int(fb.size()))); + if (!bounds_check(bel)) + continue; + if (force_z != -1) { + Loc loc = ctx->getBelLocation(bel); + if (loc.z != force_z) + continue; + } + if (!cell->testRegion(bel)) + continue; + count++; + return bel; + } + } + + void run_iter() + { + setup_initial_state(); + n_accept = 0; + n_move = 0; + for (int m = 0; m < g.cfg.inner_iters; m++) { + for (auto cell : p.cells) { + if (cell->belStrength > STRENGTH_STRONG) + continue; + if (cell->cluster != ClusterId()) { + if (cell != ctx->getClusterRootCell(cell->cluster)) + continue; // only move cluster root + Loc old_loc = ctx->getBelLocation(cell->bel); + BelId new_root = random_bel_for_cell(cell, old_loc.z); + if (new_root == BelId() || new_root == cell->bel) + continue; + ++n_move; + if (chain_swap(cell, new_root)) + ++n_accept; + } else { + BelId new_bel = random_bel_for_cell(cell); + if (new_bel == BelId() || new_bel == cell->bel) + continue; + ++n_move; + if (single_cell_swap(cell, new_bel)) + ++n_accept; + } + } + } + } +}; + +struct ParallelRefine +{ + Context *ctx; + GlobalState g; + std::vector<ThreadState> t; + ParallelRefine(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), g(ctx, cfg) + { + g.flat_nets.reserve(ctx->nets.size()); + for (auto &net : ctx->nets) { + net.second->udata = g.flat_nets.size(); + g.flat_nets.push_back(net.second.get()); + } + // Setup per thread context + for (int i = 0; i < cfg.threads; i++) { + t.emplace_back(ctx, g, i); + } + // Setup region bounds + for (auto ®ion : ctx->region) { + Region *r = region.second.get(); + NetBB bb; + if (r->constr_bels) { + bb.x0 = std::numeric_limits<int>::max(); + bb.x1 = std::numeric_limits<int>::min(); + bb.y0 = std::numeric_limits<int>::max(); + bb.y1 = std::numeric_limits<int>::min(); + for (auto bel : r->bels) { + Loc loc = ctx->getBelLocation(bel); + bb.x0 = std::min(bb.x0, loc.x); + bb.x1 = std::max(bb.x1, loc.x); + bb.y0 = std::min(bb.y0, loc.y); + bb.y1 = std::max(bb.y1, loc.y); + } + } else { + bb.x0 = 0; + bb.y0 = 0; + bb.x1 = ctx->getGridDimX(); + bb.y1 = ctx->getGridDimY(); + } + g.region_bounds[r->name] = bb; + } + // Setup fast bels map + pool<IdString> cell_types_in_use; + for (auto &cell : ctx->cells) { + IdString cell_type = cell.second->type; + cell_types_in_use.insert(cell_type); + } + + for (auto cell_type : cell_types_in_use) { + g.bels.addCellType(cell_type); + } + }; + std::vector<Partition> parts; + void do_partition() + { + parts.clear(); + parts.emplace_back(ctx); + bool yaxis = false; + while (parts.size() < t.size()) { + std::vector<Partition> next(parts.size() * 2); + for (size_t i = 0; i < parts.size(); i++) { + // Randomly permute pivot every iteration so we get different thread boundaries + const float delta = 0.1; + float pivot = (0.5 - (delta / 2)) + (delta / 2) * (ctx->rng(10000) / 10000.0f); + parts.at(i).split(ctx, yaxis, pivot, next.at(i * 2), next.at(i * 2 + 1)); + } + std::swap(parts, next); + yaxis = !yaxis; + } + + NPNR_ASSERT(parts.size() == t.size()); + // TODO: thread pool to make this worthwhile... + std::vector<std::thread> workers; + for (size_t i = 0; i < t.size(); i++) { + workers.emplace_back([i, this]() { t.at(i).set_partition(parts.at(i)); }); + } + for (auto &w : workers) + w.join(); + } + + void update_global_costs() + { + g.last_bounds.resize(g.flat_nets.size()); + g.last_tmg_costs.resize(g.flat_nets.size()); + g.total_wirelen = 0; + g.total_timing_cost = 0; + for (size_t i = 0; i < g.flat_nets.size(); i++) { + NetInfo *ni = g.flat_nets.at(i); + if (g.skip_net(ni)) + continue; + g.last_bounds.at(i) = NetBB::compute(ctx, ni); + g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg); + if (!g.timing_skip_net(ni)) { + auto &tc = g.last_tmg_costs.at(i); + tc.resize(ni->users.capacity()); + for (auto usr : ni->users.enumerate()) { + tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index); + g.total_timing_cost += tc.at(usr.index.idx()); + } + } + } + } + void run() + { + + ScopeLock<Context> lock(ctx); + auto refine_start = std::chrono::high_resolution_clock::now(); + + g.tmg.setup_only = true; + g.tmg.setup(); + do_partition(); + log_info("Running parallel refinement with %d threads.\n", int(t.size())); + int iter = 1; + bool done = false; + update_global_costs(); + double avg_wirelen = g.total_wirelen; + wirelen_t min_wirelen = g.total_wirelen; + while (true) { + if (iter > 1) { + if (g.total_wirelen >= min_wirelen) { + done = true; + } else if (g.total_wirelen < min_wirelen) { + min_wirelen = g.total_wirelen; + } + int n_accept = 0, n_move = 0; + for (auto &t_data : t) { + n_accept += t_data.n_accept; + n_move += t_data.n_move; + } + double r_accept = n_accept / double(n_move); + if (g.total_wirelen < (0.95 * avg_wirelen) && g.total_wirelen > 0) { + avg_wirelen = 0.8 * avg_wirelen + 0.2 * g.total_wirelen; + } else { + if (r_accept > 0.15 && g.radius > 1) { + g.temperature *= 0.95; + } else { + g.temperature *= 0.8; + } + } + if ((iter % 10) == 0 && g.radius > 1) + --g.radius; + } + + if ((iter == 1) || ((iter % 5) == 0) || done) + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f\n", + iter, g.temperature, double(g.total_timing_cost), double(g.total_wirelen)); + + if (done) + break; + + do_partition(); + + std::vector<std::thread> workers; + workers.reserve(t.size()); + for (int j = 0; j < int(t.size()); j++) + workers.emplace_back([this, j]() { t.at(j).run_iter(); }); + for (auto &w : workers) + w.join(); + g.tmg.run(); + update_global_costs(); + iter++; + ctx->yield(); + } + auto refine_end = std::chrono::high_resolution_clock::now(); + log_info("Placement refine time %.02fs\n", std::chrono::duration<float>(refine_end - refine_start).count()); + } +}; +} // namespace + +ParallelRefineCfg::ParallelRefineCfg(Context *ctx) +{ + timing_driven = ctx->setting<bool>("timing_driven"); + threads = ctx->setting<int>("threads", 8); + // snap to nearest power of two; and minimum thread size + int actual_threads = 1; + while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size) + actual_threads *= 2; + threads = actual_threads; + hpwl_scale_x = 1; + hpwl_scale_y = 1; +} + +bool parallel_refine(Context *ctx, ParallelRefineCfg cfg) +{ + // TODO + ParallelRefine refine(ctx, cfg); + refine.run(); + timing_analysis(ctx); + return true; +} + +NEXTPNR_NAMESPACE_END diff --git a/common/parallel_refine.h b/common/parallel_refine.h new file mode 100644 index 00000000..556317cd --- /dev/null +++ b/common/parallel_refine.h @@ -0,0 +1,43 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 gatecat <gatecat@ds0.me> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PARALLEL_REFINE_H +#define PARALLEL_REFINE_H + +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct ParallelRefineCfg +{ + ParallelRefineCfg(Context *ctx); + bool timing_driven; + int threads; + int hpwl_scale_x, hpwl_scale_y; + double lambda = 0.5f; + float crit_exp = 8; + int inner_iters = 15; + int min_thread_size = 500; +}; + +bool parallel_refine(Context *ctx, ParallelRefineCfg cfg); + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/placer1.cc b/common/placer1.cc index 6de035b4..a6ba3895 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -91,7 +91,7 @@ class SAPlacer decltype(NetInfo::udata) n = 0; for (auto &net : ctx->nets) { old_udata.emplace_back(net.second->udata); - net_arc_tcost.at(n).resize(net.second->users.size()); + net_arc_tcost.at(n).resize(net.second->users.capacity()); net.second->udata = n++; net_by_udata.push_back(net.second.get()); } @@ -118,7 +118,6 @@ class SAPlacer } region_bounds[r->name] = bb; } - build_port_index(); for (auto &cell : ctx->cells) { CellInfo *ci = cell.second.get(); if (ci->cluster == ClusterId()) @@ -858,7 +857,7 @@ class SAPlacer } // Get the timing cost for an arc of a net - inline double get_timing_cost(NetInfo *net, size_t user) + inline double get_timing_cost(NetInfo *net, const PortRef &user) { int cc; if (net->driver.cell == nullptr) @@ -866,11 +865,11 @@ class SAPlacer if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE) return 0; if (cfg.budgetBased) { - double delay = ctx->getDelayNS(ctx->predictArcDelay(net, net->users.at(user))); - return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10)); + double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user)); + return std::min(10.0, std::exp(delay - ctx->getDelayNS(user.budget) / 10)); } else { - float crit = tmg.get_criticality(CellPortKey(net->users.at(user))); - double delay = ctx->getDelayNS(ctx->predictArcDelay(net, net->users.at(user))); + float crit = tmg.get_criticality(CellPortKey(user)); + double delay = ctx->getDelayNS(ctx->predictArcDelay(net, user)); return delay * std::pow(crit, crit_exp); } } @@ -883,9 +882,9 @@ class SAPlacer if (ignore_net(ni)) continue; net_bounds[ni->udata] = get_net_bounds(ni); - if (cfg.timing_driven && int(ni->users.size()) < cfg.timingFanoutThresh) - for (size_t i = 0; i < ni->users.size(); i++) - net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); + if (cfg.timing_driven && int(ni->users.entries()) < cfg.timingFanoutThresh) + for (auto usr : ni->users.enumerate()) + net_arc_tcost[ni->udata][usr.index.idx()] = get_timing_cost(ni, usr.value); } } @@ -923,13 +922,13 @@ class SAPlacer }; std::vector<decltype(NetInfo::udata)> bounds_changed_nets_x, bounds_changed_nets_y; - std::vector<std::pair<decltype(NetInfo::udata), size_t>> changed_arcs; + std::vector<std::pair<decltype(NetInfo::udata), store_index<PortRef>>> changed_arcs; std::vector<BoundChangeType> already_bounds_changed_x, already_bounds_changed_y; std::vector<std::vector<bool>> already_changed_arcs; std::vector<BoundingBox> new_net_bounds; - std::vector<std::pair<std::pair<decltype(NetInfo::udata), size_t>, double>> new_arc_costs; + std::vector<std::pair<std::pair<decltype(NetInfo::udata), store_index<PortRef>>, double>> new_arc_costs; wirelen_t wirelen_delta = 0; double timing_delta = 0; @@ -940,7 +939,7 @@ class SAPlacer already_bounds_changed_y.resize(p->ctx->nets.size()); already_changed_arcs.resize(p->ctx->nets.size()); for (auto &net : p->ctx->nets) { - already_changed_arcs.at(net.second->udata).resize(net.second->users.size()); + already_changed_arcs.at(net.second->udata).resize(net.second->users.capacity()); } new_net_bounds = p->net_bounds; } @@ -956,7 +955,7 @@ class SAPlacer already_bounds_changed_y[bc] = NO_CHANGE; } for (const auto &tc : changed_arcs) - already_changed_arcs[tc.first][tc.second] = false; + already_changed_arcs[tc.first][tc.second.idx()] = false; bounds_changed_nets_x.clear(); bounds_changed_nets_y.clear(); changed_arcs.clear(); @@ -1100,22 +1099,22 @@ class SAPlacer } } - if (cfg.timing_driven && int(pn->users.size()) < cfg.timingFanoutThresh) { + if (cfg.timing_driven && int(pn->users.entries()) < cfg.timingFanoutThresh) { // Output ports - all arcs change timing if (port.second.type == PORT_OUT) { int cc; TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); if (cls != TMG_IGNORE) - for (size_t i = 0; i < pn->users.size(); i++) - if (!mc.already_changed_arcs[pn->udata][i]) { - mc.changed_arcs.emplace_back(std::make_pair(pn->udata, i)); - mc.already_changed_arcs[pn->udata][i] = true; + for (auto usr : pn->users.enumerate()) + if (!mc.already_changed_arcs[pn->udata][usr.index.idx()]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr.index)); + mc.already_changed_arcs[pn->udata][usr.index.idx()] = true; } } else if (port.second.type == PORT_IN) { - auto usr = fast_port_to_user.at(std::make_pair(cell->name, port.first)); - if (!mc.already_changed_arcs[pn->udata][usr]) { - mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr)); - mc.already_changed_arcs[pn->udata][usr] = true; + auto usr_idx = port.second.user_idx; + if (!mc.already_changed_arcs[pn->udata][usr_idx.idx()]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr_idx)); + mc.already_changed_arcs[pn->udata][usr_idx.idx()] = true; } } } @@ -1142,11 +1141,12 @@ class SAPlacer if (cfg.timing_driven) { for (const auto &tc : md.changed_arcs) { - double old_cost = net_arc_tcost.at(tc.first).at(tc.second); - double new_cost = get_timing_cost(net_by_udata.at(tc.first), tc.second); + double old_cost = net_arc_tcost.at(tc.first).at(tc.second.idx()); + double new_cost = + get_timing_cost(net_by_udata.at(tc.first), net_by_udata.at(tc.first)->users.at(tc.second)); md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); md.timing_delta += (new_cost - old_cost); - md.already_changed_arcs[tc.first][tc.second] = false; + md.already_changed_arcs[tc.first][tc.second.idx()] = false; } } } @@ -1158,21 +1158,10 @@ class SAPlacer for (const auto &bc : md.bounds_changed_nets_y) net_bounds[bc] = md.new_net_bounds[bc]; for (const auto &tc : md.new_arc_costs) - net_arc_tcost[tc.first.first].at(tc.first.second) = tc.second; + net_arc_tcost[tc.first.first].at(tc.first.second.idx()) = tc.second; curr_wirelen_cost += md.wirelen_delta; curr_timing_cost += md.timing_delta; } - // Build the cell port -> user index - void build_port_index() - { - for (auto &net : ctx->nets) { - NetInfo *ni = net.second.get(); - for (size_t i = 0; i < ni->users.size(); i++) { - auto &usr = ni->users.at(i); - fast_port_to_user[std::make_pair(usr.cell->name, usr.port)] = i; - } - } - } // Simple routeability driven placement const int large_cell_thresh = 50; @@ -1240,9 +1229,6 @@ class SAPlacer // Map net arcs to their timing cost (criticality * delay ns) std::vector<std::vector<double>> net_arc_tcost; - // Fast lookup for cell port to net user index - dict<std::pair<IdString, IdString>, size_t> fast_port_to_user; - // Fast lookup for cell to clusters dict<ClusterId, std::vector<CellInfo *>> cluster2cell; diff --git a/common/placer_heap.cc b/common/placer_heap.cc index f8385cef..5ffb7642 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -46,6 +46,7 @@ #include "fast_bels.h" #include "log.h" #include "nextpnr.h" +#include "parallel_refine.h" #include "place_common.h" #include "placer1.h" #include "scope_lock.h" @@ -346,8 +347,14 @@ class HeAPPlacer ctx->check(); lock.unlock_early(); - if (!placer1_refine(ctx, Placer1Cfg(ctx))) { - return false; + if (cfg.parallelRefine) { + if (!parallel_refine(ctx, ParallelRefineCfg(ctx))) { + return false; + } + } else { + if (!placer1_refine(ctx, Placer1Cfg(ctx))) { + return false; + } } return true; @@ -655,9 +662,9 @@ class HeAPPlacer template <typename Tf> void foreach_port(NetInfo *net, Tf func) { if (net->driver.cell != nullptr) - func(net->driver, -1); - for (size_t i = 0; i < net->users.size(); i++) - func(net->users.at(i), i); + func(net->driver, store_index<PortRef>()); + for (auto usr : net->users.enumerate()) + func(usr.value, usr.index); } // Build the system of equations for either X or Y @@ -682,7 +689,7 @@ class HeAPPlacer // Find the bounds of the net in this axis, and the ports that correspond to these bounds PortRef *lbport = nullptr, *ubport = nullptr; int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min(); - foreach_port(ni, [&](PortRef &port, int user_idx) { + foreach_port(ni, [&](PortRef &port, store_index<PortRef> user_idx) { int pos = cell_pos(port.cell); if (pos < lbpos) { lbpos = pos; @@ -713,17 +720,17 @@ class HeAPPlacer }; // Add all relevant connections to the matrix - foreach_port(ni, [&](PortRef &port, int user_idx) { + foreach_port(ni, [&](PortRef &port, store_index<PortRef> user_idx) { int this_pos = cell_pos(port.cell); auto process_arc = [&](PortRef *other) { if (other == &port) return; int o_pos = cell_pos(other->cell); - double weight = 1.0 / (ni->users.size() * + double weight = 1.0 / (ni->users.entries() * std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(o_pos - this_pos))); - if (user_idx != -1) { + if (user_idx) { weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)), cfg.criticalityExponent)); } @@ -1786,6 +1793,8 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx) beta = ctx->setting<float>("placerHeap/beta"); criticalityExponent = ctx->setting<int>("placerHeap/criticalityExponent"); timingWeight = ctx->setting<int>("placerHeap/timingWeight"); + parallelRefine = ctx->setting<bool>("placerHeap/parallelRefine", false); + timing_driven = ctx->setting<bool>("timing_driven"); solverTolerance = 1e-5; placeAllAtOnce = false; diff --git a/common/placer_heap.h b/common/placer_heap.h index bb40a126..9c62869e 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -41,6 +41,7 @@ struct PlacerHeapCfg bool timing_driven; float solverTolerance; bool placeAllAtOnce; + bool parallelRefine; int hpwl_scale_x, hpwl_scale_y; int spread_scale_x, spread_scale_y; diff --git a/common/pybindings.cc b/common/pybindings.cc index eef460ce..9a783eb4 100644 --- a/common/pybindings.cc +++ b/common/pybindings.cc @@ -220,7 +220,7 @@ PYBIND11_EMBEDDED_MODULE(MODULE_NAME, m) readwrite_wrapper<PortInfo &, decltype(&PortInfo::type), &PortInfo::type, pass_through<PortType>, pass_through<PortType>>::def_wrap(pi_cls, "type"); - typedef std::vector<PortRef> PortRefVector; + typedef indexed_store<PortRef> PortRefVector; typedef dict<WireId, PipMap> WireMap; typedef pool<BelId> BelSet; typedef pool<WireId> WireSet; @@ -288,7 +288,7 @@ PYBIND11_EMBEDDED_MODULE(MODULE_NAME, m) WRAP_MAP(m, WireMap, wrap_context<PipMap &>, "WireMap"); WRAP_MAP_UPTR(m, RegionMap, "RegionMap"); - WRAP_VECTOR(m, PortRefVector, wrap_context<PortRef>); + WRAP_INDEXSTORE(m, PortRefVector, wrap_context<PortRef>); typedef dict<IdString, ClockFmax> ClockFmaxMap; WRAP_MAP(m, ClockFmaxMap, pass_through<ClockFmax>, "ClockFmaxMap"); diff --git a/common/pycontainers.h b/common/pycontainers.h index a93230ab..ff49c34c 100644 --- a/common/pycontainers.h +++ b/common/pycontainers.h @@ -186,6 +186,63 @@ struct vector_wrapper #define WRAP_VECTOR(m, t, conv) vector_wrapper<t, py::return_value_policy::copy, conv>().wrap(m, #t, #t "Iterator") +template <typename T, py::return_value_policy P = py::return_value_policy::copy, + typename value_conv = PythonConversion::pass_through<T>> +struct indexed_store_wrapper +{ + typedef decltype(std::declval<T>().begin()) iterator_t; + typedef decltype(*(std::declval<iterator_t>())) value_t; + typedef typename PythonConversion::ContextualWrapper<T &> wrapped_vector; + typedef typename PythonConversion::ContextualWrapper<std::pair<iterator_t, iterator_t>> wrapped_pair; + using return_t = typename value_conv::ret_type; + static wrapped_pair iter(wrapped_vector &range) + { + return wrapped_pair(range.ctx, std::make_pair(range.base.begin(), range.base.end())); + } + + static std::string repr(wrapped_vector &range) + { + PythonConversion::string_converter<value_t> conv; + bool first = true; + std::stringstream ss; + ss << "["; + for (const auto &item : range.base) { + if (!first) + ss << ", "; + ss << "'" << conv.to_str(range.ctx, item) << "'"; + first = false; + } + ss << "]"; + return ss.str(); + } + + static int len(wrapped_vector &range) { return range.base.capacity(); } + + static py::object getitem(wrapped_vector &range, int i) + { + store_index<std::remove_reference_t<value_t>> idx(i); + if (!range.base.count(idx)) + throw py::none(); + return py::cast(value_conv()(range.ctx, boost::ref(range.base.at(idx)))); + } + + static void wrap(py::module &m, const char *range_name, const char *iter_name) + { + py::class_<wrapped_vector>(m, range_name) + .def("__iter__", iter) + .def("__repr__", repr) + .def("__len__", len) + .def("__getitem__", getitem); + + iterator_wrapper<iterator_t, P, value_conv>().wrap(m, iter_name); + } + + typedef iterator_wrapper<iterator_t, P, value_conv> iter_wrap; +}; + +#define WRAP_INDEXSTORE(m, t, conv) \ + indexed_store_wrapper<t, py::return_value_policy::copy, conv>().wrap(m, #t, #t "Iterator") + /* Wrapper for a pair, allows accessing either using C++-style members (.first and .second) or as a Python iterable and indexable object diff --git a/common/router1.cc b/common/router1.cc index f387aee1..98132116 100644 --- a/common/router1.cc +++ b/common/router1.cc @@ -34,7 +34,7 @@ struct arc_key { NetInfo *net_info; // logical user cell port index - int user_idx; + store_index<PortRef> user_idx; // physical index into cell->bel pin mapping (usually 0) unsigned phys_idx; @@ -52,7 +52,7 @@ struct arc_key unsigned int hash() const { std::size_t seed = std::hash<NetInfo *>()(net_info); - seed ^= std::hash<int>()(user_idx) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= user_idx.hash() + 0x9e3779b9 + (seed << 6) + (seed >> 2); seed ^= std::hash<int>()(phys_idx) + 0x9e3779b9 + (seed << 6) + (seed >> 2); return seed; } @@ -157,7 +157,7 @@ struct Router1 return; NetInfo *net_info = arc.net_info; - int user_idx = arc.user_idx; + auto user_idx = arc.user_idx; unsigned phys_idx = arc.phys_idx; auto src_wire = ctx->getNetinfoSourceWire(net_info); @@ -318,14 +318,14 @@ struct Router1 auto src_wire = ctx->getNetinfoSourceWire(net_info); log_assert(src_wire != WireId()); - for (int user_idx = 0; user_idx < int(net_info->users.size()); user_idx++) { + for (auto user : net_info->users.enumerate()) { unsigned phys_idx = 0; - for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, net_info->users[user_idx])) { + for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, user.value)) { log_assert(dst_wire != WireId()); arc_key arc; arc.net_info = net_info; - arc.user_idx = user_idx; + arc.user_idx = user.index; arc.phys_idx = phys_idx++; valid_arcs.insert(arc); #if 0 @@ -391,28 +391,29 @@ struct Router1 if (dst_to_arc.count(src_wire)) log_error("Wire %s is used as source and sink in different nets: %s vs %s (%d)\n", ctx->nameOfWire(src_wire), ctx->nameOf(net_info), - ctx->nameOf(dst_to_arc.at(src_wire).net_info), dst_to_arc.at(src_wire).user_idx); + ctx->nameOf(dst_to_arc.at(src_wire).net_info), dst_to_arc.at(src_wire).user_idx.idx()); - for (int user_idx = 0; user_idx < int(net_info->users.size()); user_idx++) { + for (auto user : net_info->users.enumerate()) { unsigned phys_idx = 0; - for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, net_info->users[user_idx])) { + for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, user.value)) { arc_key arc; arc.net_info = net_info; - arc.user_idx = user_idx; + arc.user_idx = user.index; arc.phys_idx = phys_idx++; if (dst_to_arc.count(dst_wire)) { if (dst_to_arc.at(dst_wire).net_info == net_info) continue; log_error("Found two arcs with same sink wire %s: %s (%d) vs %s (%d)\n", - ctx->nameOfWire(dst_wire), ctx->nameOf(net_info), user_idx, - ctx->nameOf(dst_to_arc.at(dst_wire).net_info), dst_to_arc.at(dst_wire).user_idx); + ctx->nameOfWire(dst_wire), ctx->nameOf(net_info), user.index.idx(), + ctx->nameOf(dst_to_arc.at(dst_wire).net_info), + dst_to_arc.at(dst_wire).user_idx.idx()); } if (src_to_net.count(dst_wire)) log_error("Wire %s is used as source and sink in different nets: %s vs %s (%d)\n", ctx->nameOfWire(dst_wire), ctx->nameOf(src_to_net.at(dst_wire)), - ctx->nameOf(net_info), user_idx); + ctx->nameOf(net_info), user.index.idx()); dst_to_arc[dst_wire] = arc; @@ -441,9 +442,8 @@ struct Router1 // TODO: this matches the situation before supporting multiple cell->bel pins, but do we want to keep // this invariant? if (phys_idx == 0) - log_warning("No wires found for port %s on destination cell %s.\n", - ctx->nameOf(net_info->users[user_idx].port), - ctx->nameOf(net_info->users[user_idx].cell)); + log_warning("No wires found for port %s on destination cell %s.\n", ctx->nameOf(user.value.port), + ctx->nameOf(user.value.cell)); } src_to_net[src_wire] = net_info; @@ -463,7 +463,7 @@ struct Router1 { NetInfo *net_info = arc.net_info; - int user_idx = arc.user_idx; + auto user_idx = arc.user_idx; auto src_wire = ctx->getNetinfoSourceWire(net_info); auto dst_wire = ctx->getNetinfoSinkWire(net_info, net_info->users[user_idx], arc.phys_idx); @@ -472,8 +472,8 @@ struct Router1 float crit = tmg.get_criticality(CellPortKey(net_info->users.at(user_idx))); if (ctx->debug) { - log("Routing arc %d on net %s (%d arcs total):\n", user_idx, ctx->nameOf(net_info), - int(net_info->users.size())); + log("Routing arc %d on net %s (%d arcs total):\n", user_idx.idx(), ctx->nameOf(net_info), + int(net_info->users.capacity())); log(" source ... %s\n", ctx->nameOfWire(src_wire)); log(" sink ..... %s\n", ctx->nameOfWire(dst_wire)); } @@ -805,8 +805,7 @@ struct Router1 NetInfo *ni = net.second.get(); if (skip_net(ni)) continue; - for (size_t i = 0; i < ni->users.size(); i++) { - auto &usr = ni->users.at(i); + for (auto &usr : ni->users) { ++arc_count; delay_t slack = tmg.get_setup_slack(CellPortKey(usr)); if (slack == std::numeric_limits<delay_t>::min()) @@ -825,8 +824,7 @@ struct Router1 NetInfo *ni = net.second.get(); if (skip_net(ni)) continue; - for (size_t i = 0; i < ni->users.size(); i++) { - auto &usr = ni->users.at(i); + for (auto &usr : ni->users) { delay_t slack = tmg.get_setup_slack(CellPortKey(usr)); if (slack == std::numeric_limits<delay_t>::min()) continue; @@ -912,7 +910,8 @@ bool router1(Context *ctx, const Router1Cfg &cfg) arc_key arc = router.arc_queue_pop(); if (!router.route_arc(arc, true)) { - log_warning("Failed to find a route for arc %d of net %s.\n", arc.user_idx, ctx->nameOf(arc.net_info)); + log_warning("Failed to find a route for arc %d of net %s.\n", arc.user_idx.idx(), + ctx->nameOf(arc.net_info)); #ifndef NDEBUG router.check(); ctx->check(); @@ -937,8 +936,7 @@ bool router1(Context *ctx, const Router1Cfg &cfg) } if (is_locked) continue; - for (size_t i = 0; i < ni->users.size(); i++) { - auto &usr = ni->users.at(i); + for (auto &usr : ni->users) { delay_t slack = router.tmg.get_setup_slack(CellPortKey(usr)); if (slack == std::numeric_limits<delay_t>::min()) continue; @@ -1051,15 +1049,15 @@ bool Context::checkRoutedDesign() const found_unrouted = true; } - dict<WireId, int> dest_wires; - for (int user_idx = 0; user_idx < int(net_info->users.size()); user_idx++) { - for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, net_info->users[user_idx])) { + dict<WireId, store_index<PortRef>> dest_wires; + for (auto user : net_info->users.enumerate()) { + for (auto dst_wire : ctx->getNetinfoSinkWires(net_info, user.value)) { log_assert(dst_wire != WireId()); - dest_wires[dst_wire] = user_idx; + dest_wires[dst_wire] = user.index; if (net_info->wires.count(dst_wire) == 0) { if (ctx->debug) - log(" sink %d (%s) not bound to net\n", user_idx, ctx->nameOfWire(dst_wire)); + log(" sink %d (%s) not bound to net\n", user.index.idx(), ctx->nameOfWire(dst_wire)); found_unrouted = true; } } @@ -1086,7 +1084,7 @@ bool Context::checkRoutedDesign() const if (db_entry.children.empty()) { if (dest_wires.count(w) != 0) { if (ctx->debug) - log(" %*s=> sink %d\n", 2 * num, "", dest_wires.at(w)); + log(" %*s=> sink %d\n", 2 * num, "", dest_wires.at(w).idx()); } else { if (ctx->debug) log(" %*s=> stub\n", 2 * num, ""); diff --git a/common/router2.cc b/common/router2.cc index c76e1f61..e943e493 100644 --- a/common/router2.cc +++ b/common/router2.cc @@ -117,7 +117,7 @@ struct Router2 NetInfo *ni = net.second.get(); ni->udata = i; nets_by_udata.at(i) = ni; - nets.at(i).arcs.resize(ni->users.size()); + nets.at(i).arcs.resize(ni->users.capacity()); // Start net bounding box at overall min/max nets.at(i).bb.x0 = std::numeric_limits<int>::max(); @@ -133,10 +133,9 @@ struct Router2 nets.at(i).cy += drv_loc.y; } - for (size_t j = 0; j < ni->users.size(); j++) { - auto &usr = ni->users.at(j); + for (auto usr : ni->users.enumerate()) { WireId src_wire = ctx->getNetinfoSourceWire(ni); - for (auto &dst_wire : ctx->getNetinfoSinkWires(ni, usr)) { + for (auto &dst_wire : ctx->getNetinfoSinkWires(ni, usr.value)) { nets.at(i).src_wire = src_wire; if (ni->driver.cell == nullptr) src_wire = dst_wire; @@ -146,10 +145,10 @@ struct Router2 log_error("No wire found for port %s on source cell %s.\n", ctx->nameOf(ni->driver.port), ctx->nameOf(ni->driver.cell)); if (dst_wire == WireId()) - log_error("No wire found for port %s on destination cell %s.\n", ctx->nameOf(usr.port), - ctx->nameOf(usr.cell)); - nets.at(i).arcs.at(j).emplace_back(); - auto &ad = nets.at(i).arcs.at(j).back(); + log_error("No wire found for port %s on destination cell %s.\n", ctx->nameOf(usr.value.port), + ctx->nameOf(usr.value.cell)); + nets.at(i).arcs.at(usr.index.idx()).emplace_back(); + auto &ad = nets.at(i).arcs.at(usr.index.idx()).back(); ad.sink_wire = dst_wire; // Set bounding box for this arc ad.bb = ctx->getRouteBoundingBox(src_wire, dst_wire); @@ -160,14 +159,14 @@ struct Router2 nets.at(i).bb.y1 = std::max(nets.at(i).bb.y1, ad.bb.y1); } // Add location to centroid sum - Loc usr_loc = ctx->getBelLocation(usr.cell->bel); + Loc usr_loc = ctx->getBelLocation(usr.value.cell->bel); nets.at(i).cx += usr_loc.x; nets.at(i).cy += usr_loc.y; } nets.at(i).hpwl = std::max( std::abs(nets.at(i).bb.y1 - nets.at(i).bb.y0) + std::abs(nets.at(i).bb.x1 - nets.at(i).bb.x0), 1); - nets.at(i).cx /= int(ni->users.size() + 1); - nets.at(i).cy /= int(ni->users.size() + 1); + nets.at(i).cx /= int(ni->users.entries() + 1); + nets.at(i).cy /= int(ni->users.entries() + 1); if (ctx->debug) log_info("%s: bb=(%d, %d)->(%d, %d) c=(%d, %d) hpwl=%d\n", ctx->nameOf(ni), nets.at(i).bb.x0, nets.at(i).bb.y0, nets.at(i).bb.x1, nets.at(i).bb.y1, nets.at(i).cx, nets.at(i).cy, @@ -218,11 +217,11 @@ struct Router2 for (auto &net_pair : ctx->nets) { auto *net = net_pair.second.get(); auto &nd = nets.at(net->udata); - for (size_t usr = 0; usr < net->users.size(); usr++) { - auto &ad = nd.arcs.at(usr); + for (auto usr : net->users.enumerate()) { + auto &ad = nd.arcs.at(usr.index.idx()); for (size_t phys_pin = 0; phys_pin < ad.size(); phys_pin++) { - if (check_arc_routing(net, usr, phys_pin)) { - record_prerouted_net(net, usr, phys_pin); + if (check_arc_routing(net, usr.index, phys_pin)) { + record_prerouted_net(net, usr.index, phys_pin); } } } @@ -261,7 +260,7 @@ struct Router2 // Nets that failed routing std::vector<NetInfo *> failed_nets; - std::vector<std::pair<size_t, size_t>> route_arcs; + std::vector<std::pair<store_index<PortRef>, size_t>> route_arcs; std::priority_queue<QueuedWire, std::vector<QueuedWire>, QueuedWire::Greater> fwd_queue, bwd_queue; // Special case where one net has multiple logical arcs to the same physical sink @@ -305,7 +304,7 @@ struct Router2 log(__VA_ARGS__); \ } while (0) - void bind_pip_internal(PerNetData &net, size_t user, int wire, PipId pip) + void bind_pip_internal(PerNetData &net, store_index<PortRef> user, int wire, PipId pip) { auto &wd = flat_wires.at(wire); auto found = net.wires.find(wd.w); @@ -323,7 +322,7 @@ struct Router2 } } - void unbind_pip_internal(PerNetData &net, size_t user, WireId wire) + void unbind_pip_internal(PerNetData &net, store_index<PortRef> user, WireId wire) { auto &wd = wire_data(wire); auto &b = net.wires.at(wd.w); @@ -335,10 +334,10 @@ struct Router2 } } - void ripup_arc(NetInfo *net, size_t user, size_t phys_pin) + void ripup_arc(NetInfo *net, store_index<PortRef> user, size_t phys_pin) { auto &nd = nets.at(net->udata); - auto &ad = nd.arcs.at(user).at(phys_pin); + auto &ad = nd.arcs.at(user.idx()).at(phys_pin); if (!ad.routed) return; WireId src = nets.at(net->udata).src_wire; @@ -351,7 +350,8 @@ struct Router2 ad.routed = false; } - float score_wire_for_arc(NetInfo *net, size_t user, size_t phys_pin, WireId wire, PipId pip, float crit_weight) + float score_wire_for_arc(NetInfo *net, store_index<PortRef> user, size_t phys_pin, WireId wire, PipId pip, + float crit_weight) { auto &wd = wire_data(wire); auto &nd = nets.at(net->udata); @@ -367,13 +367,14 @@ struct Router2 float present_cost = 1.0f + overuse * curr_cong_weight * crit_weight; if (pip != PipId()) { Loc pl = ctx->getPipLocation(pip); - bias_cost = cfg.bias_cost_factor * (base_cost / int(net->users.size())) * + bias_cost = cfg.bias_cost_factor * (base_cost / int(net->users.entries())) * ((std::abs(pl.x - nd.cx) + std::abs(pl.y - nd.cy)) / float(nd.hpwl)); } return base_cost * hist_cost * present_cost / (1 + (source_uses * crit_weight)) + bias_cost; } - float get_togo_cost(NetInfo *net, size_t user, int wire, WireId src_sink, float crit_weight, bool bwd = false) + float get_togo_cost(NetInfo *net, store_index<PortRef> user, int wire, WireId src_sink, float crit_weight, + bool bwd = false) { auto &nd = nets.at(net->udata); auto &wd = flat_wires[wire]; @@ -386,10 +387,10 @@ struct Router2 return (ctx->getDelayNS(est_delay) / (1 + source_uses * crit_weight)) + cfg.ipin_cost_adder; } - bool check_arc_routing(NetInfo *net, size_t usr, size_t phys_pin) + bool check_arc_routing(NetInfo *net, store_index<PortRef> usr, size_t phys_pin) { auto &nd = nets.at(net->udata); - auto &ad = nd.arcs.at(usr).at(phys_pin); + auto &ad = nd.arcs.at(usr.idx()).at(phys_pin); WireId src_wire = nets.at(net->udata).src_wire; WireId cursor = ad.sink_wire; while (nd.wires.count(cursor)) { @@ -404,10 +405,10 @@ struct Router2 return (cursor == src_wire); } - void record_prerouted_net(NetInfo *net, size_t usr, size_t phys_pin) + void record_prerouted_net(NetInfo *net, store_index<PortRef> usr, size_t phys_pin) { auto &nd = nets.at(net->udata); - auto &ad = nd.arcs.at(usr).at(phys_pin); + auto &ad = nd.arcs.at(usr.idx()).at(phys_pin); ad.routed = true; WireId src = nets.at(net->udata).src_wire; @@ -449,7 +450,7 @@ struct Router2 } // Find all the wires that must be used to route a given arc - bool reserve_wires_for_arc(NetInfo *net, size_t i) + bool reserve_wires_for_arc(NetInfo *net, store_index<PortRef> i) { bool did_something = false; WireId src = ctx->getNetinfoSourceWire(net); @@ -459,7 +460,7 @@ struct Router2 WireId cursor = sink; bool done = false; if (ctx->debug) - log("reserving wires for arc %d (%s.%s) of net %s\n", int(i), ctx->nameOf(usr.cell), + log("reserving wires for arc %d (%s.%s) of net %s\n", i.idx(), ctx->nameOf(usr.cell), ctx->nameOf(usr.port), ctx->nameOf(net)); while (!done) { auto &wd = wire_data(cursor); @@ -501,8 +502,8 @@ struct Router2 WireId src = ctx->getNetinfoSourceWire(net); if (src == WireId()) continue; - for (size_t i = 0; i < net->users.size(); i++) - did_something |= reserve_wires_for_arc(net, i); + for (auto usr : net->users.enumerate()) + did_something |= reserve_wires_for_arc(net, usr.index); } } while (did_something); } @@ -529,12 +530,12 @@ struct Router2 return false; } - void update_wire_by_loc(ThreadContext &t, NetInfo *net, size_t i, size_t phys_pin, bool is_mt) + void update_wire_by_loc(ThreadContext &t, NetInfo *net, store_index<PortRef> i, size_t phys_pin, bool is_mt) { if (is_pseudo_const_net(net)) return; auto &nd = nets.at(net->udata); - auto &ad = nd.arcs.at(i).at(phys_pin); + auto &ad = nd.arcs.at(i.idx()).at(phys_pin); WireId cursor = ad.sink_wire; if (!nd.wires.count(cursor)) return; @@ -571,28 +572,29 @@ struct Router2 bool was_visited_fwd(int wire) { return flat_wires.at(wire).visited_fwd; } bool was_visited_bwd(int wire) { return flat_wires.at(wire).visited_bwd; } - float get_arc_crit(NetInfo *net, size_t i) + float get_arc_crit(NetInfo *net, store_index<PortRef> i) { if (!timing_driven) return 0; return tmg.get_criticality(CellPortKey(net->users.at(i))); } - bool arc_failed_slack(NetInfo *net, size_t usr_idx) + bool arc_failed_slack(NetInfo *net, store_index<PortRef> usr_idx) { return timing_driven_ripup && (tmg.get_setup_slack(CellPortKey(net->users.at(usr_idx))) < (2 * ctx->getDelayEpsilon())); } - ArcRouteResult route_arc(ThreadContext &t, NetInfo *net, size_t i, size_t phys_pin, bool is_mt, bool is_bb = true) + ArcRouteResult route_arc(ThreadContext &t, NetInfo *net, store_index<PortRef> i, size_t phys_pin, bool is_mt, + bool is_bb = true) { // Do some initial lookups and checks auto arc_start = std::chrono::high_resolution_clock::now(); auto &nd = nets[net->udata]; - auto &ad = nd.arcs.at(i).at(phys_pin); + auto &ad = nd.arcs.at(i.idx()).at(phys_pin); auto &usr = net->users.at(i); - ROUTE_LOG_DBG("Routing arc %d of net '%s' (%d, %d) -> (%d, %d)\n", int(i), ctx->nameOf(net), ad.bb.x0, ad.bb.y0, - ad.bb.x1, ad.bb.y1); + ROUTE_LOG_DBG("Routing arc %d of net '%s' (%d, %d) -> (%d, %d)\n", i.idx(), ctx->nameOf(net), ad.bb.x0, + ad.bb.y0, ad.bb.x1, ad.bb.y1); WireId src_wire = ctx->getNetinfoSourceWire(net), dst_wire = ctx->getNetinfoSinkWire(net, usr, phys_pin); if (src_wire == WireId()) ARC_LOG_ERR("No wire found for port %s on source cell %s.\n", ctx->nameOf(net->driver.port), @@ -614,7 +616,7 @@ struct Router2 // 0. starting within a small range of existing routing // 1. expanding from all routing int mode = 0; - if (net->users.size() < 4 || nd.wires.empty() || (crit > 0.95)) + if (net->users.entries() < 4 || nd.wires.empty() || (crit > 0.95)) mode = 1; // This records the point where forwards and backwards routing met @@ -844,11 +846,11 @@ struct Router2 t.processed_sinks.insert(dst_wire); ad.routed = true; auto arc_end = std::chrono::high_resolution_clock::now(); - ROUTE_LOG_DBG("Routing arc %d of net '%s' (is_bb = %d) took %02fs\n", int(i), ctx->nameOf(net), is_bb, + ROUTE_LOG_DBG("Routing arc %d of net '%s' (is_bb = %d) took %02fs\n", i.idx(), ctx->nameOf(net), is_bb, std::chrono::duration<float>(arc_end - arc_start).count()); } else { auto arc_end = std::chrono::high_resolution_clock::now(); - ROUTE_LOG_DBG("Failed routing arc %d of net '%s' (is_bb = %d) took %02fs\n", int(i), ctx->nameOf(net), + ROUTE_LOG_DBG("Failed routing arc %d of net '%s' (is_bb = %d) took %02fs\n", i.idx(), ctx->nameOf(net), is_bb, std::chrono::duration<float>(arc_end - arc_start).count()); result = ARC_RETRY_WITHOUT_BB; } @@ -880,26 +882,26 @@ struct Router2 t.in_wire_by_loc.clear(); auto &nd = nets.at(net->udata); bool failed_slack = false; - for (size_t i = 0; i < net->users.size(); i++) - failed_slack |= arc_failed_slack(net, i); - for (size_t i = 0; i < net->users.size(); i++) { - auto &ad = nd.arcs.at(i); + for (auto usr : net->users.enumerate()) + failed_slack |= arc_failed_slack(net, usr.index); + for (auto usr : net->users.enumerate()) { + auto &ad = nd.arcs.at(usr.index.idx()); for (size_t j = 0; j < ad.size(); j++) { // Ripup failed arcs to start with // Check if arc is already legally routed - if (!failed_slack && check_arc_routing(net, i, j)) { - update_wire_by_loc(t, net, i, j, true); + if (!failed_slack && check_arc_routing(net, usr.index, j)) { + update_wire_by_loc(t, net, usr.index, j, true); continue; } // Ripup arc to start with - ripup_arc(net, i, j); - t.route_arcs.emplace_back(i, j); + ripup_arc(net, usr.index, j); + t.route_arcs.emplace_back(usr.index, j); } } // Route most critical arc first std::stable_sort(t.route_arcs.begin(), t.route_arcs.end(), - [&](std::pair<size_t, size_t> a, std::pair<size_t, size_t> b) { + [&](std::pair<store_index<PortRef>, size_t> a, std::pair<store_index<PortRef>, size_t> b) { return get_arc_crit(net, a.first) > get_arc_crit(net, b.first); }); for (auto a : t.route_arcs) { @@ -913,7 +915,7 @@ struct Router2 } else { // Attempt a re-route without the bounding box constraint ROUTE_LOG_DBG("Rerouting arc %d.%d of net '%s' without bounding box, possible tricky routing...\n", - int(a.first), int(a.second), ctx->nameOf(net)); + a.first.idx(), int(a.second), ctx->nameOf(net)); auto res2 = route_arc(t, net, a.first, a.second, is_mt, false); // If this also fails, no choice but to give up if (res2 != ARC_SUCCESS) { @@ -926,7 +928,7 @@ struct Router2 log("\n"); } } - log_error("Failed to route arc %d.%d of net '%s', from %s to %s.\n", int(a.first), + log_error("Failed to route arc %d.%d of net '%s', from %s to %s.\n", a.first.idx(), int(a.second), ctx->nameOf(net), ctx->nameOfWire(ctx->getNetinfoSourceWire(net)), ctx->nameOfWire(ctx->getNetinfoSinkWire(net, net->users.at(a.first), a.second))); } @@ -991,7 +993,7 @@ struct Router2 } } - bool bind_and_check(NetInfo *net, int usr_idx, int phys_pin) + bool bind_and_check(NetInfo *net, store_index<PortRef> usr_idx, int phys_pin) { #ifdef ARCH_ECP5 if (net->is_global) @@ -999,7 +1001,7 @@ struct Router2 #endif bool success = true; auto &nd = nets.at(net->udata); - auto &ad = nd.arcs.at(usr_idx).at(phys_pin); + auto &ad = nd.arcs.at(usr_idx.idx()).at(phys_pin); auto &usr = net->users.at(usr_idx); WireId src = ctx->getNetinfoSourceWire(net); // Skip routes with no source @@ -1043,7 +1045,8 @@ struct Router2 if (!nd.wires.count(cursor)) { log("Failure details:\n"); log(" Cursor: %s\n", ctx->nameOfWire(cursor)); - log_error("Internal error; incomplete route tree for arc %d of net %s.\n", usr_idx, ctx->nameOf(net)); + log_error("Internal error; incomplete route tree for arc %d of net %s.\n", usr_idx.idx(), + ctx->nameOf(net)); } PipId p = nd.wires.at(cursor).first; if (ctx->checkPipAvailForNet(p, net)) { @@ -1104,9 +1107,9 @@ struct Router2 } // Bind the arcs using the routes we have discovered - for (size_t i = 0; i < net->users.size(); i++) { - for (size_t phys_pin = 0; phys_pin < nets.at(net->udata).arcs.at(i).size(); phys_pin++) { - if (!bind_and_check(net, i, phys_pin)) { + for (auto usr : net->users.enumerate()) { + for (size_t phys_pin = 0; phys_pin < nets.at(net->udata).arcs.at(usr.index.idx()).size(); phys_pin++) { + if (!bind_and_check(net, usr.index, phys_pin)) { ++arch_fail; success = false; } @@ -1313,10 +1316,10 @@ struct Router2 route_net(tcs.at(N), fail, false); } - delay_t get_route_delay(int net, int usr_idx, int phys_idx) + delay_t get_route_delay(int net, store_index<PortRef> usr_idx, int phys_idx) { auto &nd = nets.at(net); - auto &ad = nd.arcs.at(usr_idx).at(phys_idx); + auto &ad = nd.arcs.at(usr_idx.idx()).at(phys_idx); WireId cursor = ad.sink_wire; if (cursor == WireId() || nd.src_wire == WireId()) return 0; @@ -1344,11 +1347,11 @@ struct Router2 continue; #endif auto &nd = nets.at(net); - for (int i = 0; i < int(nd.arcs.size()); i++) { + for (auto usr : ni->users.enumerate()) { delay_t arc_delay = 0; - for (int j = 0; j < int(nd.arcs.at(i).size()); j++) - arc_delay = std::max(arc_delay, get_route_delay(net, i, j)); - tmg.set_route_delay(CellPortKey(ni->users.at(i)), DelayPair(arc_delay)); + for (int j = 0; j < int(nd.arcs.at(usr.index.idx()).size()); j++) + arc_delay = std::max(arc_delay, get_route_delay(net, usr.index, j)); + tmg.set_route_delay(CellPortKey(usr.value), DelayPair(arc_delay)); } } } @@ -1416,8 +1419,8 @@ struct Router2 if (timing_driven_ripup && iter < 500) { for (size_t i = 0; i < nets_by_udata.size(); i++) { NetInfo *ni = nets_by_udata.at(i); - for (size_t j = 0; j < ni->users.size(); j++) { - if (arc_failed_slack(ni, j)) { + for (auto usr : ni->users.enumerate()) { + if (arc_failed_slack(ni, usr.index)) { failed_nets.insert(i); ++tmgfail; } @@ -1451,7 +1454,7 @@ struct Router2 log_info("1000 slowest nets by runtime:\n"); for (int i = 0; i < std::min(int(nets_by_runtime.size()), 1000); i++) { log(" %80s %6d %.1fms\n", nets_by_runtime.at(i).second.c_str(ctx), - int(ctx->nets.at(nets_by_runtime.at(i).second)->users.size()), + int(ctx->nets.at(nets_by_runtime.at(i).second)->users.entries()), nets_by_runtime.at(i).first / 1000.0); } } diff --git a/common/timing.cc b/common/timing.cc index f30d4fc5..834785fb 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -60,14 +60,6 @@ void TimingAnalyser::init_ports() data.cell_port = CellPortKey(ci->name, port.first); } } - // Cell port to net port mapping - for (auto &net : ctx->nets) { - NetInfo *ni = net.second.get(); - if (ni->driver.cell != nullptr) - ports[CellPortKey(ni->driver)].net_port = NetPortKey(ni->name); - for (size_t i = 0; i < ni->users.size(); i++) - ports[CellPortKey(ni->users.at(i))].net_port = NetPortKey(ni->name, i); - } } void TimingAnalyser::get_cell_delays() @@ -79,7 +71,7 @@ void TimingAnalyser::get_cell_delays() IdString name = port.first.port; // Ignore dangling ports altogether for timing purposes - if (pd.net_port.net == IdString()) + if (!pi.net) continue; pd.cell_arcs.clear(); int clkInfoCount = 0; diff --git a/common/timing.h b/common/timing.h index b34fd636..fe1bcaa8 100644 --- a/common/timing.h +++ b/common/timing.h @@ -44,28 +44,6 @@ struct CellPortKey } }; -struct NetPortKey -{ - IdString net; - size_t idx; - NetPortKey(){}; - explicit NetPortKey(IdString net) : net(net), idx(DRIVER_IDX){}; // driver - explicit NetPortKey(IdString net, size_t user) : net(net), idx(user){}; // user - - static const size_t DRIVER_IDX = std::numeric_limits<size_t>::max(); - - inline bool is_driver() const { return (idx == DRIVER_IDX); } - inline size_t user_idx() const - { - NPNR_ASSERT(idx != DRIVER_IDX); - return idx; - } - - unsigned int hash() const { return mkhash(net.hash(), idx); } - - inline bool operator==(const NetPortKey &other) const { return (net == other.net) && (idx == other.idx); } -}; - struct ClockDomainKey { IdString clock; @@ -194,7 +172,6 @@ struct TimingAnalyser struct PerPort { CellPortKey cell_port; - NetPortKey net_port; PortType type; // per domain timings dict<domain_id_t, ArrivReqTime> arrival; diff --git a/common/timing_opt.cc b/common/timing_opt.cc index a73a70cf..f9246292 100644 --- a/common/timing_opt.cc +++ b/common/timing_opt.cc @@ -73,8 +73,7 @@ class TimingOptimiser for (auto usr : ni->users) { max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max(); } - for (size_t i = 0; i < ni->users.size(); i++) { - auto &usr = ni->users.at(i); + for (auto usr : ni->users) { delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); delay_t slack = tmg.get_setup_slack(CellPortKey(usr)); delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr)); @@ -234,7 +233,7 @@ class TimingOptimiser std::vector<std::vector<PortRef *>> find_crit_paths(float crit_thresh, size_t max_count) { std::vector<std::vector<PortRef *>> crit_paths; - std::vector<std::pair<NetInfo *, int>> crit_nets; + std::vector<std::pair<NetInfo *, store_index<PortRef>>> crit_nets; std::vector<IdString> netnames; std::transform(ctx->nets.begin(), ctx->nets.end(), std::back_inserter(netnames), [](const std::pair<IdString, std::unique_ptr<NetInfo>> &kv) { return kv.first; }); @@ -243,28 +242,19 @@ class TimingOptimiser if (crit_nets.size() >= max_count) break; float highest_crit = 0; - size_t crit_user_idx = 0; + store_index<PortRef> crit_user_idx{}; NetInfo *ni = ctx->nets.at(net).get(); - for (size_t i = 0; i < ni->users.size(); i++) { - float crit = tmg.get_criticality(CellPortKey(ni->users.at(i))); + for (auto usr : ni->users.enumerate()) { + float crit = tmg.get_criticality(CellPortKey(usr.value)); if (crit > highest_crit) { highest_crit = crit; - crit_user_idx = i; + crit_user_idx = usr.index; } } if (highest_crit > crit_thresh) - crit_nets.push_back(std::make_pair(ni, crit_user_idx)); + crit_nets.emplace_back(ni, crit_user_idx); } - auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { - NPNR_ASSERT(port.net != nullptr); - for (size_t i = 0; i < port.net->users.size(); i++) { - auto &usr = port.net->users.at(i); - if (usr.cell == cell && usr.port == port.name) - return i; - } - NPNR_ASSERT_FALSE("port user not found on net"); - }; pool<PortRef *, hash_ptr_ops> used_ports; for (auto crit_net : crit_nets) { @@ -280,7 +270,7 @@ class TimingOptimiser NetInfo *back_cursor = crit_net.first; while (back_cursor != nullptr) { float max_crit = 0; - std::pair<NetInfo *, size_t> crit_sink{nullptr, 0}; + std::pair<NetInfo *, store_index<PortRef>> crit_sink{nullptr, {}}; CellInfo *cell = back_cursor->driver.cell; if (cell == nullptr) break; @@ -298,13 +288,12 @@ class TimingOptimiser bool is_path = ctx->getCellDelay(cell, port.first, back_cursor->driver.port, combDelay); if (!is_path) continue; - size_t user_idx = port_user_index(cell, port.second); float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first)); - if (used_ports.count(&(pn->users.at(user_idx)))) + if (used_ports.count(&(pn->users.at(port.second.user_idx)))) continue; if (usr_crit >= max_crit) { max_crit = usr_crit; - crit_sink = std::make_pair(pn, user_idx); + crit_sink = std::make_pair(pn, port.second.user_idx); } } @@ -319,7 +308,7 @@ class TimingOptimiser while (fwd_cursor != nullptr) { crit_path.push_back(fwd_cursor); float max_crit = 0; - std::pair<NetInfo *, size_t> crit_sink{nullptr, 0}; + std::pair<NetInfo *, store_index<PortRef>> crit_sink{nullptr, {}}; CellInfo *cell = fwd_cursor->cell; for (auto port : cell->ports) { if (port.second.type != PORT_OUT) @@ -336,13 +325,13 @@ class TimingOptimiser bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); if (!is_path) continue; - for (size_t i = 0; i < pn->users.size(); i++) { - if (used_ports.count(&(pn->users.at(i)))) + for (auto usr : pn->users.enumerate()) { + if (used_ports.count(&(pn->users.at(usr.index)))) continue; - float crit = tmg.get_criticality(CellPortKey(pn->users.at(i))); + float crit = tmg.get_criticality(CellPortKey(usr.value)); if (crit >= max_crit) { max_crit = crit; - crit_sink = std::make_pair(pn, i); + crit_sink = std::make_pair(pn, usr.index); } } } @@ -409,14 +398,10 @@ class TimingOptimiser delay_t original_delay = 0; for (size_t i = 0; i < path.size(); i++) { - NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; - for (size_t j = 0; j < pn->users.size(); j++) { - auto &usr = pn->users.at(j); - if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { - original_delay += ctx->predictArcDelay(pn, usr); - break; - } - } + auto &port = path.at(i)->cell->ports.at(path.at(i)->port); + NetInfo *pn = port.net; + if (port.user_idx) + original_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx)); } IdString last_cell; @@ -493,14 +478,10 @@ class TimingOptimiser delay_t total_delay = 0; for (size_t i = 0; i < path.size(); i++) { - NetInfo *pn = path.at(i)->cell->ports.at(path.at(i)->port).net; - for (size_t j = 0; j < pn->users.size(); j++) { - auto &usr = pn->users.at(j); - if (usr.cell == path.at(i)->cell && usr.port == path.at(i)->port) { - total_delay += ctx->predictArcDelay(pn, usr); - break; - } - } + auto &port = path.at(i)->cell->ports.at(path.at(i)->port); + NetInfo *pn = port.net; + if (port.user_idx) + total_delay += ctx->predictArcDelay(pn, pn->users.at(port.user_idx)); if (path.at(i)->cell == next_cell) break; } diff --git a/ecp5/cells.cc b/ecp5/cells.cc index a5d484ff..2c5f96d3 100644 --- a/ecp5/cells.cc +++ b/ecp5/cells.cc @@ -212,10 +212,7 @@ static void replace_port_safe(bool has_ff, CellInfo *ff, IdString ff_port, CellI NPNR_ASSERT(lc->ports.at(lc_port).net == ff->ports.at(ff_port).net); NetInfo *ffnet = ff->ports.at(ff_port).net; if (ffnet != nullptr) - ffnet->users.erase( - std::remove_if(ffnet->users.begin(), ffnet->users.end(), - [ff, ff_port](PortRef port) { return port.cell == ff && port.port == ff_port; }), - ffnet->users.end()); + ffnet->users.remove(ff->ports.at(ff_port).user_idx); } else { ff->movePortTo(ff_port, lc, lc_port); } @@ -477,7 +474,7 @@ void nxio_to_tr(Context *ctx, CellInfo *nxio, CellInfo *trio, std::vector<std::u inv_lut->connectPorts(id_Z, trio, id_T); created_cells.push_back(std::move(inv_lut)); - if (donet->users.size() > 1) { + if (donet->users.entries() > 1) { for (auto user : donet->users) log_info(" remaining tristate user: %s.%s\n", user.cell->name.c_str(ctx), user.port.c_str(ctx)); log_error("unsupported tristate IO pattern for IO buffer '%s', " diff --git a/ecp5/globals.cc b/ecp5/globals.cc index 7b48e693..71188aa0 100644 --- a/ecp5/globals.cc +++ b/ecp5/globals.cc @@ -472,17 +472,15 @@ class Ecp5GlobalRouter } else if (is_logic_port(user)) { keep_users.push_back(user); } else { - glbptr->users.push_back(user); user.cell->ports.at(user.port).net = glbptr; + user.cell->ports.at(user.port).user_idx = glbptr->users.add(user); } } - net->users = keep_users; + net->users.clear(); + for (auto &usr : keep_users) + usr.cell->ports.at(usr.port).user_idx = net->users.add(usr); - dcc->ports[id_CLKI].net = net; - PortRef clki_pr; - clki_pr.port = id_CLKI; - clki_pr.cell = dcc.get(); - net->users.push_back(clki_pr); + dcc->connectPort(id_CLKI, net); if (net->clkconstr) { glbptr->clkconstr = std::unique_ptr<ClockConstraint>(new ClockConstraint()); glbptr->clkconstr->low = net->clkconstr->low; @@ -556,9 +554,13 @@ class Ecp5GlobalRouter if (ci->type == id_DCCA || ci->type == id_DCSC) { NetInfo *clock = ci->ports.at((ci->type == id_DCSC) ? id_DCSOUT : id_CLKO).net; NPNR_ASSERT(clock != nullptr); - bool drives_fabric = std::any_of(clock->users.begin(), clock->users.end(), - [this](const PortRef &port) { return !is_clock_port(port); }); - + bool drives_fabric = false; + for (auto &usr : clock->users) { + if (!is_clock_port(usr)) { + drives_fabric = true; + break; + } + } int glbid; if (drives_fabric) { if (fab_globals.empty()) diff --git a/ecp5/pack.cc b/ecp5/pack.cc index 2b069db0..02ce5aa1 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -326,14 +326,14 @@ class Ecp5Packer } // Pack LUTs feeding the same CCU2, RAM or DFF into a SLICE - if (znet != nullptr && znet->users.size() < 10) { + if (znet != nullptr && znet->users.entries() < 10) { for (auto user : znet->users) { if (is_lc(ctx, user.cell) || user.cell->type == id_DP16KD || is_ff(ctx, user.cell)) { for (auto port : user.cell->ports) { if (port.second.type != PORT_IN || port.second.net == nullptr || port.second.net == znet) continue; - if (port.second.net->users.size() > 10) + if (port.second.net->users.entries() > 10) continue; CellInfo *drv = port.second.net->driver.cell; if (drv == nullptr) @@ -355,11 +355,11 @@ class Ecp5Packer if (!ci->ports.count(ctx->id(inp))) continue; NetInfo *innet = ci->ports.at(ctx->id(inp)).net; - if (innet != nullptr && innet->users.size() < 5 && innet->users.size() > 1) + if (innet != nullptr && innet->users.entries() < 5 && innet->users.entries() > 1) inpnets.push_back(innet); } std::sort(inpnets.begin(), inpnets.end(), - [&](const NetInfo *a, const NetInfo *b) { return a->users.size() < b->users.size(); }); + [&](const NetInfo *a, const NetInfo *b) { return a->users.entries() < b->users.entries(); }); for (auto inet : inpnets) { for (auto &user : inet->users) { if (user.cell == nullptr || user.cell == ci || !is_lut(ctx, user.cell)) @@ -412,7 +412,7 @@ class Ecp5Packer return false; for (auto user : net->users) { if (is_top_port(user)) { - if (net->users.size() > 1) + if (net->users.entries() > 1) log_error(" port %s.%s must be connected to (and only to) a top level pin\n", user.cell->name.c_str(ctx), user.port.c_str(ctx)); tp = user; @@ -420,7 +420,7 @@ class Ecp5Packer } } if (net->driver.cell != nullptr && is_top_port(net->driver)) { - if (net->users.size() > 1) + if (net->users.entries() > 1) log_error(" port %s.%s must be connected to (and only to) a top level pin\n", net->driver.cell->name.c_str(ctx), net->driver.port.c_str(ctx)); tp = net->driver; @@ -460,9 +460,9 @@ class Ecp5Packer NetInfo *net = trio->ports.at(id_B).net; if (((ci->type == ctx->id("$nextpnr_ibuf") || ci->type == ctx->id("$nextpnr_iobuf")) && - net->users.size() > 1) || + net->users.entries() > 1) || (ci->type == ctx->id("$nextpnr_obuf") && - (net->users.size() > 2 || net->driver.cell != nullptr)) || + (net->users.entries() > 2 || net->driver.cell != nullptr)) || (ci->type == ctx->id("$nextpnr_iobuf") && ci->ports.at(id_I).net != nullptr && ci->ports.at(id_I).net->driver.cell != nullptr)) log_error("Pin B of %s '%s' connected to more than a single top level IO.\n", @@ -742,16 +742,14 @@ class Ecp5Packer feedin->params[id_INJECT1_0] = std::string("NO"); feedin->params[id_INJECT1_1] = std::string("YES"); - carry->users.erase(std::remove_if(carry->users.begin(), carry->users.end(), - [chain_in](const PortRef &user) { - return user.port == chain_in.port && user.cell == chain_in.cell; - }), - carry->users.end()); + carry->users.remove(chain_in.cell->ports.at(chain_in.port).user_idx); feedin->connectPort(id_A0, carry); NetInfo *new_carry = ctx->createNet(ctx->id(feedin->name.str(ctx) + "$COUT")); feedin->connectPort(id_COUT, new_carry); chain_in.cell->ports[chain_in.port].net = nullptr; + chain_in.cell->ports[chain_in.port].user_idx = {}; + chain_in.cell->connectPort(chain_in.port, new_carry); CellInfo *feedin_ptr = feedin.get(); @@ -782,12 +780,8 @@ class Ecp5Packer if (chain_next) { // Loop back into LUT4_1 for feedthrough feedout->connectPort(id_A1, carry); - - carry->users.erase(std::remove_if(carry->users.begin(), carry->users.end(), - [chain_next](const PortRef &user) { - return user.port == chain_next->port && user.cell == chain_next->cell; - }), - carry->users.end()); + if (chain_next->cell && chain_next->cell->ports.at(chain_next->port).user_idx) + carry->users.remove(chain_next->cell->ports.at(chain_next->port).user_idx); NetInfo *new_cout = ctx->createNet(ctx->id(feedout->name.str(ctx) + "$COUT")); feedout->connectPort(id_COUT, new_cout); @@ -833,7 +827,7 @@ class Ecp5Packer } else { NetInfo *carry_net = cell->ports.at(id_COUT).net; bool at_end = (curr_cell == carryc.cells.end() - 1); - if (carry_net != nullptr && (carry_net->users.size() > 1 || at_end)) { + if (carry_net != nullptr && (carry_net->users.entries() > 1 || at_end)) { boost::optional<PortRef> nextport; if (!at_end) { auto next_cell = *(curr_cell + 1); @@ -1123,7 +1117,7 @@ class Ecp5Packer if (pn == nullptr) continue; // Skip high-fanout nets that are unlikely to be relevant - if (pn->users.size() > 25) + if (pn->users.entries() > 25) continue; // Add other ports on this net if not already visited auto visit_port = [&](const PortRef &port) { @@ -1304,11 +1298,11 @@ class Ecp5Packer } else { // Not allowed to change to a tie-high uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } else { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } else if (is_ff(ctx, uc) && user.port == id_LSR && ((!constval && str_or_default(uc->params, id_LSRMUX, "LSR") == "LSR") || @@ -1335,7 +1329,7 @@ class Ecp5Packer user.port.str(ctx).substr(0, 6) == "SOURCE" || user.port.str(ctx).substr(0, 6) == "SIGNED" || user.port.str(ctx).substr(0, 2) == "OP") { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } else { // Connected to CIB ABCD. Default state is bitstream configurable uc->params[ctx->id(user.port.str(ctx) + "MUX")] = std::string(constval ? "1" : "0"); @@ -1343,7 +1337,7 @@ class Ecp5Packer } } else { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } } @@ -1454,9 +1448,9 @@ class Ecp5Packer rename_param(ci, "CLKWMUX", "CLKAMUX"); if (str_or_default(ci->params, id_CLKAMUX) == "CLKW") ci->params[id_CLKAMUX] = std::string("CLKA"); + rename_param(ci, "CLKRMUX", "CLKBMUX"); if (str_or_default(ci->params, id_CLKBMUX) == "CLKR") ci->params[id_CLKBMUX] = std::string("CLKB"); - rename_param(ci, "CLKRMUX", "CLKRMUX"); rename_param(ci, "CSDECODE_W", "CSDECODE_A"); rename_param(ci, "CSDECODE_R", "CSDECODE_B"); std::string outreg = str_or_default(ci->params, id_REGMODE, "NOREG"); @@ -2037,7 +2031,7 @@ class Ecp5Packer CellInfo *ci = cell.second.get(); if (ci->type == id_DQSBUFM) { CellInfo *pio = net_driven_by(ctx, ci->ports.at(id_DQSI).net, is_trellis_io, id_O); - if (pio == nullptr || ci->ports.at(id_DQSI).net->users.size() > 1) + if (pio == nullptr || ci->ports.at(id_DQSI).net->users.entries() > 1) log_error("DQSBUFM '%s' DQSI input must be connected only to a top level input\n", ci->name.c_str(ctx)); if (!pio->attrs.count(id_BEL)) @@ -2273,7 +2267,7 @@ class Ecp5Packer CellInfo *i_pio = net_driven_by(ctx, ci->ports.at(id_A).net, is_trellis_io, id_O); CellInfo *o_pio = net_only_drives(ctx, ci->ports.at(id_Z).net, is_trellis_io, id_I, true); CellInfo *iol = nullptr; - if (i_pio != nullptr && ci->ports.at(id_A).net->users.size() == 1) { + if (i_pio != nullptr && ci->ports.at(id_A).net->users.entries() == 1) { iol = create_pio_iologic(i_pio, ci); set_iologic_mode(iol, "IREG_OREG"); bool drives_iologic = false; @@ -2356,7 +2350,7 @@ class Ecp5Packer CellInfo *ci = cell.second.get(); if (ci->type == id_IDDRX1F) { CellInfo *pio = net_driven_by(ctx, ci->ports.at(id_D).net, is_trellis_io, id_O); - if (pio == nullptr || ci->ports.at(id_D).net->users.size() > 1) + if (pio == nullptr || ci->ports.at(id_D).net->users.entries() > 1) log_error("IDDRX1F '%s' D input must be connected only to a top level input\n", ci->name.c_str(ctx)); CellInfo *iol; @@ -2438,7 +2432,7 @@ class Ecp5Packer packed_cells.insert(cell.first); } else if (ci->type == id_IDDRX2F || ci->type == id_IDDR71B) { CellInfo *pio = net_driven_by(ctx, ci->ports.at(id_D).net, is_trellis_io, id_O); - if (pio == nullptr || ci->ports.at(id_D).net->users.size() > 1) + if (pio == nullptr || ci->ports.at(id_D).net->users.entries() > 1) log_error("%s '%s' D input must be connected only to a top level input\n", ci->type.c_str(ctx), ci->name.c_str(ctx)); CellInfo *iol; @@ -2530,7 +2524,7 @@ class Ecp5Packer packed_cells.insert(cell.first); } else if (ci->type == id_IDDRX2DQA) { CellInfo *pio = net_driven_by(ctx, ci->ports.at(id_D).net, is_trellis_io, id_O); - if (pio == nullptr || ci->ports.at(id_D).net->users.size() > 1) + if (pio == nullptr || ci->ports.at(id_D).net->users.entries() > 1) log_error("IDDRX2DQA '%s' D input must be connected only to a top level input\n", ci->name.c_str(ctx)); CellInfo *iol; @@ -2597,7 +2591,7 @@ class Ecp5Packer // See if it can be packed as an input ff NetInfo *d = ci->getPort(id_DI); CellInfo *pio = net_driven_by(ctx, d, is_trellis_io, id_O); - if (pio != nullptr && d->users.size() == 1) { + if (pio != nullptr && d->users.entries() == 1) { // Input FF CellInfo *iol; if (pio_iologic.count(pio->name)) diff --git a/fpga_interchange/arch.cc b/fpga_interchange/arch.cc index 917af85e..a5e802d3 100644 --- a/fpga_interchange/arch.cc +++ b/fpga_interchange/arch.cc @@ -1377,7 +1377,7 @@ void Arch::merge_constant_nets() } NPNR_ASSERT(net->driver.port == gnd_cell_port); - std::vector<PortRef> users_copy = net->users; + indexed_store<PortRef> users_copy = net->users; for (const PortRef &port_ref : users_copy) { IdString cell = port_ref.cell->name; disconnectPort(cell, port_ref.port); @@ -1400,7 +1400,7 @@ void Arch::merge_constant_nets() } NPNR_ASSERT(net->driver.port == vcc_cell_port); - std::vector<PortRef> users_copy = net->users; + indexed_store<PortRef> users_copy = net->users; for (const PortRef &port_ref : users_copy) { IdString cell = port_ref.cell->name; disconnectPort(cell, port_ref.port); diff --git a/fpga_interchange/arch_pack_clusters.cc b/fpga_interchange/arch_pack_clusters.cc index 31e0522b..97a3e1a5 100644 --- a/fpga_interchange/arch_pack_clusters.cc +++ b/fpga_interchange/arch_pack_clusters.cc @@ -945,10 +945,10 @@ void Arch::prepare_cluster(const ClusterPOD *cluster, uint32_t index) if (port_info.type == PORT_OUT) { exclude_nets.insert(port_info.net->name); auto &users = port_info.net->users; - if (users.size() != 1) + if (users.entries() != 1) continue; - CellInfo *user_cell = users[0].cell; + CellInfo *user_cell = (*users.begin()).cell; if (user_cell == nullptr) continue; @@ -978,7 +978,7 @@ void Arch::prepare_cluster(const ClusterPOD *cluster, uint32_t index) } else if (port_info.type == PORT_IN) { auto &driver = port_info.net->driver; auto &users = port_info.net->users; - if (users.size() != 1) + if (users.entries() != 1) continue; CellInfo *driver_cell = driver.cell; diff --git a/fpga_interchange/globals.cc b/fpga_interchange/globals.cc index ed9f73a6..6efd1d89 100644 --- a/fpga_interchange/globals.cc +++ b/fpga_interchange/globals.cc @@ -41,8 +41,8 @@ struct GlobalVist // This is our main global routing implementation. It is used both to actually route globals; and also to discover if // global buffers have available short routes from their source for auto-placement -static int route_global_arc(Context *ctx, NetInfo *net, size_t usr_idx, size_t phys_port_idx, int max_hops, - bool dry_run) +static int route_global_arc(Context *ctx, NetInfo *net, store_index<PortRef> usr_idx, size_t phys_port_idx, + int max_hops, bool dry_run) { auto &usr = net->users.at(usr_idx); WireId src = ctx->getNetinfoSourceWire(net); @@ -51,7 +51,7 @@ static int route_global_arc(Context *ctx, NetInfo *net, size_t usr_idx, size_t p if (dry_run) return -1; else - log_error("Arc %d.%d (%s.%s) of net %s has no sink wire!\n", int(usr_idx), int(phys_port_idx), + log_error("Arc %d.%d (%s.%s) of net %s has no sink wire!\n", usr_idx.idx(), int(phys_port_idx), ctx->nameOf(usr.cell), ctx->nameOf(usr.port), ctx->nameOf(net)); } // Consider any existing routing put in place by the site router, etc @@ -188,14 +188,6 @@ void Arch::place_globals() // Ignore if there is no driver; or the driver is not placed if (net->driver.cell == nullptr || net->driver.cell->bel == BelId()) continue; - size_t user_idx = 0; - bool found_user = false; - for (user_idx = 0; user_idx < net->users.size(); user_idx++) - if (net->users.at(user_idx).cell == ci && net->users.at(user_idx).port == pin_name) { - found_user = true; - break; - } - NPNR_ASSERT(found_user); // TODO: substantial performance improvements are probably possible, although of questionable benefit given // the low number of globals in a typical device... @@ -213,7 +205,7 @@ void Arch::place_globals() if (!isBelLocationValid(bel)) goto fail; // Check distance - distance = route_global_arc(ctx, net, user_idx, 0, pin.max_hops, true); + distance = route_global_arc(ctx, net, port.user_idx, 0, pin.max_hops, true); if (distance != -1 && distance < shortest_distance) { best_bel = bel; shortest_distance = distance; @@ -262,16 +254,16 @@ void Arch::route_globals() int total_sinks = 0; int global_sinks = 0; - for (size_t i = 0; i < net->users.size(); i++) { - auto &usr = net->users.at(i); - for (size_t j = 0; j < ctx->getNetinfoSinkWireCount(net, usr); j++) { - int result = route_global_arc(ctx, net, i, j, pin.max_hops, false); + for (auto usr : net->users.enumerate()) { + for (size_t j = 0; j < ctx->getNetinfoSinkWireCount(net, usr.value); j++) { + int result = route_global_arc(ctx, net, usr.index, j, pin.max_hops, false); ++total_sinks; if (result != -1) ++global_sinks; if ((result == -1) && pin.force_routing) log_error("Failed to route arc %d.%d (%s.%s) of net %s using dedicated global routing!\n", - int(i), int(j), ctx->nameOf(usr.cell), ctx->nameOf(usr.port), ctx->nameOf(net)); + usr.index.idx(), int(j), ctx->nameOf(usr.value.cell), ctx->nameOf(usr.value.port), + ctx->nameOf(net)); } } diff --git a/frontend/frontend_base.h b/frontend/frontend_base.h index 8ac61bbe..a2ac219c 100644 --- a/frontend/frontend_base.h +++ b/frontend/frontend_base.h @@ -690,7 +690,7 @@ template <typename FrontendType> struct GenericFrontend // Combine users for (auto &usr : mergee->users) { usr.cell->ports[usr.port].net = base; - base->users.push_back(usr); + usr.cell->ports[usr.port].user_idx = base->users.add(usr); } // Point aliases to the new net for (IdString alias : mergee->aliases) { diff --git a/generic/cells.cc b/generic/cells.cc index 76d6474f..e4a24767 100644 --- a/generic/cells.cc +++ b/generic/cells.cc @@ -121,7 +121,7 @@ void nxio_to_iob(Context *ctx, CellInfo *nxio, CellInfo *iob, pool<IdString> &to tbuf->movePortTo(ctx->id("A"), iob, ctx->id("I")); tbuf->movePortTo(ctx->id("E"), iob, ctx->id("EN")); - if (donet->users.size() > 1) { + if (donet->users.entries() > 1) { for (auto user : donet->users) log_info(" remaining tristate user: %s.%s\n", user.cell->name.c_str(ctx), user.port.c_str(ctx)); log_error("unsupported tristate IO pattern for IO buffer '%s', " diff --git a/generic/family.cmake b/generic/family.cmake index cd4e3801..de4ce1af 100644 --- a/generic/family.cmake +++ b/generic/family.cmake @@ -1,4 +1,4 @@ -set(VIADUCT_UARCHES "example") +set(VIADUCT_UARCHES "example" "okami") foreach(uarch ${VIADUCT_UARCHES}) aux_source_directory(${family}/viaduct/${uarch} UARCH_FILES) foreach(target ${family_targets}) diff --git a/generic/pack.cc b/generic/pack.cc index cb3f5897..428d6642 100644 --- a/generic/pack.cc +++ b/generic/pack.cc @@ -124,9 +124,10 @@ static void set_net_constant(const Context *ctx, NetInfo *orig, NetInfo *constne log_info("%s user %s\n", orig->name.c_str(ctx), uc->name.c_str(ctx)); if ((is_lut(ctx, uc) || is_lc(ctx, uc)) && (user.port.str(ctx).at(0) == 'I') && !constval) { uc->ports[user.port].net = nullptr; + uc->ports[user.port].user_idx = {}; } else { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } } @@ -224,8 +225,8 @@ static void pack_io(Context *ctx) ci->type.c_str(ctx), ci->name.c_str(ctx)); NetInfo *net = iob->ports.at(ctx->id("PAD")).net; if (((ci->type == ctx->id("$nextpnr_ibuf") || ci->type == ctx->id("$nextpnr_iobuf")) && - net->users.size() > 1) || - (ci->type == ctx->id("$nextpnr_obuf") && (net->users.size() > 2 || net->driver.cell != nullptr))) + net->users.entries() > 1) || + (ci->type == ctx->id("$nextpnr_obuf") && (net->users.entries() > 2 || net->driver.cell != nullptr))) log_error("PAD of %s '%s' connected to more than a single top level IO.\n", iob->type.c_str(ctx), iob->name.c_str(ctx)); diff --git a/generic/viaduct/okami/constids.inc b/generic/viaduct/okami/constids.inc new file mode 100644 index 00000000..b40d5be8 --- /dev/null +++ b/generic/viaduct/okami/constids.inc @@ -0,0 +1,14 @@ +X(LUT4) +X(DFF) +X(CLK) +X(D) +X(F) +X(Q) +X(INBUF) +X(OUTBUF) +X(I) +X(EN) +X(O) +X(IOB) +X(PAD) +X(INIT) diff --git a/generic/viaduct/okami/okami.cc b/generic/viaduct/okami/okami.cc new file mode 100644 index 00000000..864bdb45 --- /dev/null +++ b/generic/viaduct/okami/okami.cc @@ -0,0 +1,546 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 gatecat <gatecat@ds0.me> + * Copyright (C) 2022 Lofty <dan.ravensloft@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "log.h" +#include "nextpnr.h" +#include "util.h" +#include "viaduct_api.h" +#include "viaduct_helpers.h" + +#define GEN_INIT_CONSTIDS +#define VIADUCT_CONSTIDS "viaduct/okami/constids.inc" +#include "viaduct_constids.h" + +NEXTPNR_NAMESPACE_BEGIN + +namespace { +struct OkamiImpl : ViaductAPI +{ + ~OkamiImpl(){}; + void init(Context *ctx) override + { + init_uarch_constids(ctx); + ViaductAPI::init(ctx); + h.init(ctx); + init_wires(); + init_bels(); + init_pips(); + } + + void pack() override + { + // Trim nextpnr IOBs - assume IO buffer insertion has been done in synthesis + const pool<CellTypePort> top_ports{ + CellTypePort(id_INBUF, id_PAD), + CellTypePort(id_OUTBUF, id_PAD), + }; + h.remove_nextpnr_iobs(top_ports); + // Replace constants with LUTs + const dict<IdString, Property> vcc_params = {{id_INIT, Property(0xFFFF, 16)}}; + const dict<IdString, Property> gnd_params = {{id_INIT, Property(0x0000, 16)}}; + h.replace_constants(CellTypePort(id_LUT4, id_F), CellTypePort(id_LUT4, id_F), vcc_params, gnd_params); + // Constrain directly connected LUTs and FFs together to use dedicated resources + int lutffs = h.constrain_cell_pairs(pool<CellTypePort>{{id_LUT4, id_F}}, pool<CellTypePort>{{id_DFF, id_D}}, 1, + false); + log_info("Constrained %d LUTFF pairs.\n", lutffs); + } + + void prePlace() override { assign_cell_info(); } + + bool isBelLocationValid(BelId bel) const override + { + Loc l = ctx->getBelLocation(bel); + if (is_io(l.x, l.y)) { + return true; + } else { + return slice_valid(l.x, l.y, l.z / 2); + } + } + + private: + ViaductHelpers h; + // Configuration + // Grid size including IOBs at edges + const int M = 32; + const int X = M, Y = M; + // SLICEs per tile + const int N = 8; + // LUT input count + const int K = 4; + // Number of tile input buses + const int InputMuxCount = 10; // >= 6 for attosoc; >= 10 for arbiter + // Number of output wires in a direction + const int OutputMuxCount = 8; // >= 5 for attosoc; >= 8 for arbiter + + // For fast wire lookups + struct TileWires + { + std::vector<WireId> clk, q, f, d; + std::vector<WireId> slice_inputs; + std::vector<WireId> slice_outputs; + std::vector<WireId> tile_inputs_north, tile_inputs_east, tile_inputs_south, tile_inputs_west; + std::vector<WireId> tile_outputs_north, tile_outputs_east, tile_outputs_south, tile_outputs_west; + std::vector<WireId> pad; + }; + + std::vector<std::vector<TileWires>> wires_by_tile; + + // Create wires to attach to bels and pips + void init_wires() + { + NPNR_ASSERT(X >= 3); + NPNR_ASSERT(Y >= 3); + NPNR_ASSERT(K >= 2); + NPNR_ASSERT(N >= 1); + NPNR_ASSERT(InputMuxCount >= OutputMuxCount); + + log_info("Creating wires...\n"); + wires_by_tile.resize(Y); + for (int y = 0; y < Y; y++) { + auto &row_wires = wires_by_tile.at(y); + row_wires.resize(X); + for (int x = 0; x < X; x++) { + auto &w = row_wires.at(x); + for (int z = 0; z < N; z++) { + // Clock input + w.clk.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("CLK%d", z))), ctx->id("CLK"), x, y)); + // FF input + w.d.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("D%d", z))), ctx->id("D"), x, y)); + // FF and LUT outputs + w.q.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("Q%d", z))), ctx->id("Q"), x, y)); + w.f.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("F%d", z))), ctx->id("F"), x, y)); + // LUT inputs + for (int i = 0; i < K; i++) + w.slice_inputs.push_back( + ctx->addWire(h.xy_id(x, y, ctx->id(stringf("L%dI%d", z, i))), ctx->id("I"), x, y)); + w.slice_outputs.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("SLICEOUT[%d]", z))), + ctx->id("SLICEOUT"), x, y)); + } + // Tile inputs + for (int tile_input = 0; tile_input < InputMuxCount; tile_input++) { + w.tile_inputs_north.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEINN[%d]", tile_input))), ctx->id("TILEINN"), x, y)); + w.tile_inputs_east.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEINE[%d]", tile_input))), ctx->id("TILEINE"), x, y)); + w.tile_inputs_south.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEINS[%d]", tile_input))), ctx->id("TILEINS"), x, y)); + w.tile_inputs_west.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEINW[%d]", tile_input))), ctx->id("TILEINW"), x, y)); + } + // Tile outputs + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) { + w.tile_outputs_north.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEOUTN[%d]", tile_output))), ctx->id("TILEOUTN"), x, y)); + w.tile_outputs_east.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEOUTE[%d]", tile_output))), ctx->id("TILEOUTE"), x, y)); + w.tile_outputs_south.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEOUTS[%d]", tile_output))), ctx->id("TILEOUTS"), x, y)); + w.tile_outputs_west.push_back(ctx->addWire( + h.xy_id(x, y, ctx->id(stringf("TILEOUTW[%d]", tile_output))), ctx->id("TILEOUTW"), x, y)); + } + // Pad wires for IO + if (is_io(x, y) && x != y) + for (int z = 0; z < 2; z++) + w.pad.push_back(ctx->addWire(h.xy_id(x, y, ctx->id(stringf("PAD%d", z))), id_PAD, x, y)); + } + } + } + bool is_io(int x, int y) const + { + // IO are on the edges of the device + return (x == 0) || (x == (X - 1)) || (y == 0) || (y == (Y - 1)); + } + // Create IO bels in an IO tile + void add_io_bels(int x, int y) + { + auto &w = wires_by_tile.at(y).at(x); + for (int z = 0; z < 2; z++) { + BelId b = ctx->addBel(h.xy_id(x, y, ctx->id(stringf("IO%d", z))), id_IOB, Loc(x, y, z), false, false); + ctx->addBelInout(b, id_PAD, w.pad.at(z)); + ctx->addBelInput(b, id_I, w.slice_inputs.at(z * K + 0)); + ctx->addBelInput(b, id_EN, w.slice_inputs.at(z * K + 1)); + ctx->addBelOutput(b, id_O, w.slice_outputs.at(z)); + } + } + PipId add_pip(Loc loc, WireId src, WireId dst, delay_t delay = 0.05) + { + IdStringList name = IdStringList::concat(ctx->getWireName(dst), ctx->getWireName(src)); + return ctx->addPip(name, ctx->id("PIP"), src, dst, delay, loc); + } + // Create LUT and FF bels in a logic tile + void add_slice_bels(int x, int y) + { + auto &w = wires_by_tile.at(y).at(x); + for (int z = 0; z < N; z++) { + // Create LUT bel + BelId lut = ctx->addBel(h.xy_id(x, y, ctx->id(stringf("SLICE%d_LUT", z))), id_LUT4, Loc(x, y, z * 2), false, + false); + for (int k = 0; k < K; k++) + ctx->addBelInput(lut, ctx->id(stringf("I[%d]", k)), w.slice_inputs.at(z * K + k)); + ctx->addBelOutput(lut, id_F, w.f.at(z)); + // FF data can come from LUT output or LUT I3 + add_pip(Loc(x, y, 0), w.f.at(z), w.d.at(z)); + add_pip(Loc(x, y, 0), w.slice_inputs.at(z * K + (K - 1)), w.d.at(z)); + // Create DFF bel + BelId dff = ctx->addBel(h.xy_id(x, y, ctx->id(stringf("SLICE%d_FF", z))), id_DFF, Loc(x, y, z * 2 + 1), + false, false); + ctx->addBelInput(dff, id_CLK, w.clk.at(z)); + ctx->addBelInput(dff, id_D, w.d.at(z)); + ctx->addBelOutput(dff, id_Q, w.q.at(z)); + } + } + // Create bels according to tile type + void init_bels() + { + log_info("Creating bels...\n"); + for (int y = 0; y < Y; y++) { + for (int x = 0; x < X; x++) { + if (is_io(x, y)) { + if (x == y) + continue; // don't put IO in corners + add_io_bels(x, y); + } else { + add_slice_bels(x, y); + } + } + } + } + + // Create PIPs inside a tile; following an example synthetic routing pattern + void add_io_pips(int x, int y) + { + auto &w = wires_by_tile.at(y).at(x); + Loc loc(x, y, 0); + + const uint16_t tile_input_config[8] = { + 0b0000'0000'0000'0001, 0b0000'0000'0000'0001, 0b0000'0000'0000'0001, 0b0000'0000'0000'0001, + 0b0000'0000'0000'0010, 0b0000'0000'0000'0010, 0b0000'0000'0000'0010, 0b0000'0000'0000'0010, + }; + + // Tile inputs + for (int tile_input = 0; tile_input < InputMuxCount; tile_input++) { + auto &dst = w.tile_inputs_north.at(tile_input); + // North + for (int step = 1; step <= 4; step++) { + if (y - step <= 0 || x == 0 || x == X - 1) + break; + auto &w = wires_by_tile.at(y - step).at(x); + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) + if ((1 << tile_input) & tile_input_config[tile_output]) + add_pip(loc, w.tile_outputs_north.at(tile_output), dst); + } + } + + for (int tile_input = 0; tile_input < InputMuxCount; tile_input++) { + auto &dst = w.tile_inputs_east.at(tile_input); + // East + for (int step = 1; step <= 4; step++) { + if (x - step <= 0 || y == 0 || y == Y - 1) + break; + auto &w = wires_by_tile.at(y).at(x - step); + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) + if ((1 << tile_input) & tile_input_config[tile_output]) + add_pip(loc, w.tile_outputs_east.at(tile_output), dst); + } + } + + for (int tile_input = 0; tile_input < InputMuxCount; tile_input++) { + auto &dst = w.tile_inputs_south.at(tile_input); + // South + for (int step = 1; step <= 4; step++) { + if (y + step >= Y || x == 0 || x == X - 1) + break; + auto &w = wires_by_tile.at(y + step).at(x); + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) + if ((1 << tile_input) & tile_input_config[tile_output]) + add_pip(loc, w.tile_outputs_south.at(tile_output), dst); + } + } + + for (int tile_input = 0; tile_input < InputMuxCount; tile_input++) { + auto &dst = w.tile_inputs_west.at(tile_input); + // West + for (int step = 1; step <= 4; step++) { + if (x + step >= X || y == 0 || y == Y - 1) + break; + auto &w = wires_by_tile.at(y).at(x + step); + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) + if ((1 << tile_input) & tile_input_config[tile_output]) + add_pip(loc, w.tile_outputs_west.at(tile_output), dst); + } + } + + // Tile outputs + for (int tile_output = 0; tile_output < OutputMuxCount; tile_output++) { + for (int z = 0; z < 2; z++) { + WireId src = w.slice_outputs.at(z); + // O output + if (y == 0) + add_pip(loc, src, w.tile_outputs_north.at(tile_output)); + if (x == 0) + add_pip(loc, src, w.tile_outputs_east.at(tile_output)); + if (y == Y - 1) + add_pip(loc, src, w.tile_outputs_south.at(tile_output)); + if (x == X - 1) + add_pip(loc, src, w.tile_outputs_west.at(tile_output)); + } + } + + // Pad inputs + for (const auto &src : w.tile_inputs_north) { + for (int z = 0; z < 2; z++) { + // I input + add_pip(loc, src, w.slice_inputs.at(z * K + 0)); + // EN input + add_pip(loc, src, w.slice_inputs.at(z * K + 1)); + } + } + for (const auto &src : w.tile_inputs_east) { + for (int z = 0; z < 2; z++) { + // I input + add_pip(loc, src, w.slice_inputs.at(z * K + 0)); + // EN input + add_pip(loc, src, w.slice_inputs.at(z * K + 1)); + } + } + for (const auto &src : w.tile_inputs_south) { + for (int z = 0; z < 2; z++) { + // I input + add_pip(loc, src, w.slice_inputs.at(z * K + 0)); + // EN input + add_pip(loc, src, w.slice_inputs.at(z * K + 1)); + } + } + for (const auto &src : w.tile_inputs_west) { + for (int z = 0; z < 2; z++) { + // I input + add_pip(loc, src, w.slice_inputs.at(z * K + 0)); + // EN input + add_pip(loc, src, w.slice_inputs.at(z * K + 1)); + } + } + } + void add_slice_pips(int x, int y) + { + auto &w = wires_by_tile.at(y).at(x); + Loc loc(x, y, 0); + + const uint16_t tile_input_config[8] = {0b1010'1010'1010'1010, 0b0101'0101'0101'0101, 0b0110'0110'0110'0110, + 0b1001'1001'1001'1001, 0b0011'0011'0011'0011, 0b1100'1100'1100'1100, + 0b1111'0000'1111'0000, 0b0000'1111'0000'1111}; + + // Slice input selector + for (int lut = 0; lut < N; lut++) { + for (int lut_input = 0; lut_input < K; lut_input++) { + for (const auto &tile_input : w.tile_inputs_north) // Tile input bus + add_pip(loc, tile_input, w.slice_inputs.at(lut * K + lut_input)); + for (const auto &tile_input : w.tile_inputs_east) // Tile input bus + add_pip(loc, tile_input, w.slice_inputs.at(lut * K + lut_input)); + for (const auto &tile_input : w.tile_inputs_south) // Tile input bus + add_pip(loc, tile_input, w.slice_inputs.at(lut * K + lut_input)); + for (const auto &tile_input : w.tile_inputs_west) // Tile input bus + add_pip(loc, tile_input, w.slice_inputs.at(lut * K + lut_input)); + for (const auto &slice_output : w.slice_outputs) // Slice output bus + add_pip(loc, slice_output, w.slice_inputs.at(lut * K + lut_input)); + } + for (const auto &tile_input : w.tile_inputs_north) // Clock selector + add_pip(loc, tile_input, w.clk.at(lut)); + for (const auto &tile_input : w.tile_inputs_east) // Clock selector + add_pip(loc, tile_input, w.clk.at(lut)); + for (const auto &tile_input : w.tile_inputs_south) // Clock selector + add_pip(loc, tile_input, w.clk.at(lut)); + for (const auto &tile_input : w.tile_inputs_west) // Clock selector + add_pip(loc, tile_input, w.clk.at(lut)); + } + + // Slice output selector + for (int slice_output = 0; slice_output < N; slice_output++) { + add_pip(loc, w.f.at(slice_output), w.slice_outputs.at(slice_output)); // LUT output + add_pip(loc, w.q.at(slice_output), w.slice_outputs.at(slice_output)); // DFF output + } + + // Tile input selector + for (int step = 1; step <= 4; step++) { + if (y + step < Y) // South + for (size_t tile_input_index = 0; tile_input_index < w.tile_inputs_north.size(); tile_input_index++) + for (size_t tile_output_index = 0; + tile_output_index < wires_by_tile.at(y + step).at(x).tile_outputs_south.size(); + tile_output_index++) + if ((1 << tile_input_index) & tile_input_config[tile_output_index]) + add_pip(loc, wires_by_tile.at(y + step).at(x).tile_outputs_south.at(tile_output_index), + w.tile_inputs_north.at(tile_input_index)); + + if (x + step < X) // West + for (size_t tile_input_index = 0; tile_input_index < w.tile_inputs_east.size(); tile_input_index++) + for (size_t tile_output_index = 0; + tile_output_index < wires_by_tile.at(y).at(x + step).tile_outputs_west.size(); + tile_output_index++) + if ((1 << tile_input_index) & tile_input_config[tile_output_index]) + add_pip(loc, wires_by_tile.at(y).at(x + step).tile_outputs_west.at(tile_output_index), + w.tile_inputs_east.at(tile_input_index)); + + if (y - step >= 0) // North + for (size_t tile_input_index = 0; tile_input_index < w.tile_inputs_south.size(); tile_input_index++) + for (size_t tile_output_index = 0; + tile_output_index < wires_by_tile.at(y - step).at(x).tile_outputs_north.size(); + tile_output_index++) + if ((1 << tile_input_index) & tile_input_config[tile_output_index]) + add_pip(loc, wires_by_tile.at(y - step).at(x).tile_outputs_north.at(tile_output_index), + w.tile_inputs_south.at(tile_input_index)); + + if (x - step >= 0) // East + for (size_t tile_input_index = 0; tile_input_index < w.tile_inputs_west.size(); tile_input_index++) + for (size_t tile_output_index = 0; + tile_output_index < wires_by_tile.at(y).at(x - step).tile_outputs_east.size(); + tile_output_index++) + if ((1 << tile_input_index) & tile_input_config[tile_output_index]) + add_pip(loc, wires_by_tile.at(y).at(x - step).tile_outputs_east.at(tile_output_index), + w.tile_inputs_west.at(tile_input_index)); + } + + // Tile output selector + for (const auto &slice_output : w.slice_outputs) { + for (const auto &tile_output : w.tile_outputs_north) + add_pip(loc, slice_output, tile_output); + for (const auto &tile_output : w.tile_outputs_east) + add_pip(loc, slice_output, tile_output); + for (const auto &tile_output : w.tile_outputs_south) + add_pip(loc, slice_output, tile_output); + for (const auto &tile_output : w.tile_outputs_west) + add_pip(loc, slice_output, tile_output); + } + + for (const auto &tile_input : w.tile_inputs_north) { + for (const auto &tile_output : w.tile_outputs_north) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_east) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_south) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_west) + add_pip(loc, tile_input, tile_output); + } + for (const auto &tile_input : w.tile_inputs_east) { + for (const auto &tile_output : w.tile_outputs_north) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_east) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_south) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_west) + add_pip(loc, tile_input, tile_output); + } + for (const auto &tile_input : w.tile_inputs_south) { + for (const auto &tile_output : w.tile_outputs_north) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_east) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_south) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_west) + add_pip(loc, tile_input, tile_output); + } + for (const auto &tile_input : w.tile_inputs_west) { + for (const auto &tile_output : w.tile_outputs_north) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_east) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_south) + add_pip(loc, tile_input, tile_output); + for (const auto &tile_output : w.tile_outputs_west) + add_pip(loc, tile_input, tile_output); + } + } + void init_pips() + { + log_info("Creating pips...\n"); + for (int y = 0; y < Y; y++) + for (int x = 0; x < X; x++) { + if (is_io(x, y)) { + add_io_pips(x, y); + } else { + add_slice_pips(x, y); + } + } + } + // Validity checking + struct OkamiCellInfo + { + const NetInfo *lut_f = nullptr, *ff_d = nullptr; + bool lut_i3_used = false; + }; + std::vector<OkamiCellInfo> fast_cell_info; + void assign_cell_info() + { + fast_cell_info.resize(ctx->cells.size()); + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + auto &fc = fast_cell_info.at(ci->flat_index); + if (ci->type == id_LUT4) { + fc.lut_f = ci->getPort(id_F); + fc.lut_i3_used = (ci->getPort(ctx->id(stringf("I[%d]", K - 1))) != nullptr); + } else if (ci->type == id_DFF) { + fc.ff_d = ci->getPort(id_D); + } + } + } + bool slice_valid(int x, int y, int z) const + { + const CellInfo *lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2))); + const CellInfo *ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2 + 1))); + if (!lut || !ff) + return true; // always valid if only LUT or FF used + const auto &lut_data = fast_cell_info.at(lut->flat_index); + const auto &ff_data = fast_cell_info.at(ff->flat_index); + // In our example arch; the FF D can either be driven from LUT F or LUT I3 + // so either; FF D must equal LUT F or LUT I3 must be unused + if (ff_data.ff_d == lut_data.lut_f && lut_data.lut_f->users.entries() == 1) + return true; + // Can't route FF and LUT output separately + return false; + } + // Bel bucket functions + IdString getBelBucketForCellType(IdString cell_type) const override + { + if (cell_type.in(id_INBUF, id_OUTBUF)) + return id_IOB; + return cell_type; + } + bool isValidBelForCellType(IdString cell_type, BelId bel) const override + { + IdString bel_type = ctx->getBelType(bel); + if (bel_type == id_IOB) + return cell_type.in(id_INBUF, id_OUTBUF); + else + return (bel_type == cell_type); + } +}; + +struct OkamiArch : ViaductArch +{ + OkamiArch() : ViaductArch("okami"){}; + std::unique_ptr<ViaductAPI> create(const dict<std::string, std::string> &args) + { + return std::make_unique<OkamiImpl>(); + } +} exampleArch; +} // namespace + +NEXTPNR_NAMESPACE_END diff --git a/generic/viaduct/okami/okami_map.v b/generic/viaduct/okami/okami_map.v new file mode 100644 index 00000000..f701f0ed --- /dev/null +++ b/generic/viaduct/okami/okami_map.v @@ -0,0 +1,12 @@ +module \$lut (A, Y); + parameter WIDTH = 0; + parameter LUT = 0; + input [WIDTH-1:0] A; + output Y; + + localparam rep = 1<<(4-WIDTH); + + LUT4 #(.INIT({rep{LUT}})) _TECHMAP_REPLACE_ (.I(A), .F(Y)); +endmodule + +module \$_DFF_P_ (input D, C, output Q); DFF _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C)); endmodule diff --git a/generic/viaduct/okami/okami_prims.v b/generic/viaduct/okami/okami_prims.v new file mode 100644 index 00000000..d2813129 --- /dev/null +++ b/generic/viaduct/okami/okami_prims.v @@ -0,0 +1,35 @@ +module LUT4 #( + parameter [15:0] INIT = 0 +) ( + input [3:0] I, + output F +); + wire [7:0] s3 = I[3] ? INIT[15:8] : INIT[7:0]; + wire [3:0] s2 = I[2] ? s3[ 7:4] : s3[3:0]; + wire [1:0] s1 = I[1] ? s2[ 3:2] : s2[1:0]; + assign F = I[0] ? s1[1] : s1[0]; +endmodule + +module DFF ( + input CLK, D, + output reg Q +); + initial Q = 1'b0; + always @(posedge CLK) + Q <= D; +endmodule + +module INBUF ( + input PAD, + output O, +); + assign O = PAD; +endmodule + +module OUTBUF ( + output PAD, + input I, +); + assign PAD = I; +endmodule + diff --git a/generic/viaduct/okami/synth_okami.tcl b/generic/viaduct/okami/synth_okami.tcl new file mode 100644 index 00000000..1a0212eb --- /dev/null +++ b/generic/viaduct/okami/synth_okami.tcl @@ -0,0 +1,24 @@ +# Usage +# tcl synth_okami.tcl {out.json} + +yosys read_verilog -lib [file dirname [file normalize $argv0]]/okami_prims.v +yosys hierarchy -check -top top +yosys proc +yosys flatten +yosys tribuf -logic +yosys deminout +yosys synth -run coarse +yosys memory_map +yosys opt -full +yosys iopadmap -bits -inpad INBUF O:PAD -outpad OUTBUF I:PAD +yosys techmap -map +/techmap.v +yosys opt -fast +yosys dfflegalize -cell \$_DFF_P_ 0 +yosys abc -lut 4 -dress +yosys clean +yosys techmap -map [file dirname [file normalize $argv0]]/okami_map.v +yosys clean +yosys hierarchy -check +yosys stat + +if {$argc > 0} { yosys write_json [lindex $argv 0] } diff --git a/generic/viaduct/okami/viaduct_example.sh b/generic/viaduct/okami/viaduct_example.sh new file mode 100755 index 00000000..5a49dc10 --- /dev/null +++ b/generic/viaduct/okami/viaduct_example.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -ex +# Run synthesis +yosys -p "tcl synth_okami.tcl blinky.json" ../../examples/blinky.v +# Run PnR +${NEXTPNR:-../../../build/nextpnr-generic} --uarch okami --json blinky.json --router router2 diff --git a/generic/viaduct_helpers.cc b/generic/viaduct_helpers.cc index 10c3b802..a92d0de1 100644 --- a/generic/viaduct_helpers.cc +++ b/generic/viaduct_helpers.cc @@ -96,7 +96,7 @@ int ViaductHelpers::constrain_cell_pairs(const pool<CellTypePort> &src_ports, co continue; if (!src_ports.count(CellTypePort(ci.type, port.first))) continue; - if (!allow_fanout && port.second.net->users.size() > 1) + if (!allow_fanout && port.second.net->users.entries() > 1) continue; for (auto &usr : port.second.net->users) { if (!sink_ports.count(CellTypePort(usr))) @@ -151,7 +151,7 @@ void ViaductHelpers::replace_constants(CellTypePort vcc_driver, CellTypePort gnd NetInfo *replace = (ni.driver.cell->type == ctx->id("VCC")) ? vcc_net : gnd_net; for (auto &usr : ni.users) { usr.cell->ports.at(usr.port).net = replace; - replace->users.push_back(usr); + usr.cell->ports.at(usr.port).user_idx = replace->users.add(usr); } trim_cells.push_back(ni.driver.cell->name); trim_nets.push_back(ni.name); diff --git a/gowin/main.cc b/gowin/main.cc index 38a67fcd..1473f3e8 100644 --- a/gowin/main.cc +++ b/gowin/main.cc @@ -55,7 +55,7 @@ po::options_description GowinCommandHandler::getArchOptions() std::unique_ptr<Context> GowinCommandHandler::createContext(dict<std::string, Property> &values) { - std::regex devicere = std::regex("GW1N(S?)[A-Z]*-(LV|UV|UX)([0-9])(C?).*"); + std::regex devicere = std::regex("GW1N([SZ]?)[A-Z]*-(LV|UV|UX)([0-9])(C?).*"); std::smatch match; std::string device = vm["device"].as<std::string>(); if (!std::regex_match(device, match, devicere)) { diff --git a/gowin/pack.cc b/gowin/pack.cc index cc715864..c17a20c7 100644 --- a/gowin/pack.cc +++ b/gowin/pack.cc @@ -68,7 +68,7 @@ static void pack_alus(Context *ctx) continue; } - if (!is_alu(ctx, cin_ci) || cin->users.size() > 1) { + if (!is_alu(ctx, cin_ci) || cin->users.entries() > 1) { if (ctx->verbose) { log_info("ALU head found %s. CIN net is %s\n", ctx->nameOf(ci), ctx->nameOf(cin)); } @@ -177,9 +177,9 @@ static void pack_alus(Context *ctx) new_cells.push_back(std::move(packed)); - if (cout != nullptr && cout->users.size() > 0) { + if (cout != nullptr && cout->users.entries() > 0) { // if COUT used by logic - if ((cout->users.size() > 1) || (!is_alu(ctx, cout->users.at(0).cell))) { + if ((cout->users.entries() > 1) || (!is_alu(ctx, (*cout->users.begin()).cell))) { if (ctx->verbose) { log_info("COUT is used by logic\n"); } @@ -204,7 +204,7 @@ static void pack_alus(Context *ctx) break; } // next ALU - ci = cout->users.at(0).cell; + ci = (*cout->users.begin()).cell; // if ALU is too big if (alu_idx == (ctx->gridDimX - 2) * 6 - 1) { log_error("ALU %s is the %dth in the chain. Such long chains are not supported.\n", ctx->nameOf(ci), @@ -596,9 +596,10 @@ static void set_net_constant(const Context *ctx, NetInfo *orig, NetInfo *constne log_info("%s user %s\n", ctx->nameOf(orig), ctx->nameOf(uc)); if ((is_lut(ctx, uc) || is_lc(ctx, uc)) && (user.port.str(ctx).at(0) == 'I') && !constval) { uc->ports[user.port].net = nullptr; + uc->ports[user.port].user_idx = {}; } else { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } } diff --git a/ice40/bitstream.cc b/ice40/bitstream.cc index 89f84262..48fbc132 100644 --- a/ice40/bitstream.cc +++ b/ice40/bitstream.cc @@ -1146,7 +1146,7 @@ bool read_asc(Context *ctx, std::istream &in) if (port.second.type == PORT_OUT) net->driver = ref; else - net->users.push_back(ref); + port.second.user_idx = net->users.add(ref); } } } diff --git a/ice40/cells.cc b/ice40/cells.cc index b5f759b2..9166b41b 100644 --- a/ice40/cells.cc +++ b/ice40/cells.cc @@ -466,7 +466,7 @@ void nxio_to_sb(Context *ctx, CellInfo *nxio, CellInfo *sbio, pool<IdString> &to tbuf->movePortTo(id_A, sbio, id_D_OUT_0); tbuf->movePortTo(id_E, sbio, id_OUTPUT_ENABLE); - if (donet->users.size() > 1) { + if (donet->users.entries() > 1) { for (auto user : donet->users) log_info(" remaining tristate user: %s.%s\n", user.cell->name.c_str(ctx), user.port.c_str(ctx)); log_error("unsupported tristate IO pattern for IO buffer '%s', " diff --git a/ice40/chains.cc b/ice40/chains.cc index e6a37044..6ea261de 100644 --- a/ice40/chains.cc +++ b/ice40/chains.cc @@ -73,8 +73,8 @@ class ChainConstrainer } else { NetInfo *carry_net = cell->ports.at(id_COUT).net; bool at_end = (curr_cell == carryc.cells.end() - 1); - if (carry_net != nullptr && (carry_net->users.size() > 1 || at_end)) { - if (carry_net->users.size() > 2 || + if (carry_net != nullptr && (carry_net->users.entries() > 1 || at_end)) { + if (carry_net->users.entries() > 2 || (net_only_drives(ctx, carry_net, is_lc, id_I3, false) != net_only_drives(ctx, carry_net, is_lc, id_CIN, false)) || (at_end && !net_only_drives(ctx, carry_net, is_lc, id_I3, true))) { @@ -116,15 +116,11 @@ class ChainConstrainer lc->ports.at(id_O).net = cout_port.net; NetInfo *co_i3_net = ctx->createNet(ctx->id(lc->name.str(ctx) + "$I3")); co_i3_net->driver = cout_port.net->driver; - PortRef i3_r; - i3_r.port = id_I3; - i3_r.cell = lc.get(); - co_i3_net->users.push_back(i3_r); + lc->connectPort(id_I3, co_i3_net); PortRef o_r; o_r.port = id_O; o_r.cell = lc.get(); cout_port.net->driver = o_r; - lc->ports.at(id_I3).net = co_i3_net; cout_port.net = co_i3_net; // If COUT also connects to a CIN; preserve the carry chain @@ -133,34 +129,21 @@ class ChainConstrainer // Connect I1 to 1 to preserve carry chain NetInfo *vcc = ctx->nets.at(ctx->id("$PACKER_VCC_NET")).get(); - lc->ports.at(id_I1).net = vcc; - PortRef i1_r; - i1_r.port = id_I1; - i1_r.cell = lc.get(); - vcc->users.push_back(i1_r); + lc->connectPort(id_I1, vcc); // Connect co_cin_net to the COUT of the LC - PortRef co_r; - co_r.port = id_COUT; - co_r.cell = lc.get(); - co_cin_net->driver = co_r; - lc->ports.at(id_COUT).net = co_cin_net; + lc->connectPort(id_COUT, co_cin_net); // Find the user corresponding to the next CIN int replaced_ports = 0; if (ctx->debug) log_info("cell: %s\n", cin_cell->name.c_str(ctx)); for (auto port : {id_CIN, id_I3}) { - auto &usr = lc->ports.at(id_O).net->users; - if (ctx->debug) - for (auto user : usr) - log_info("%s.%s\n", user.cell->name.c_str(ctx), user.port.c_str(ctx)); - auto fnd_user = std::find_if(usr.begin(), usr.end(), - [&](const PortRef &pr) { return pr.cell == cin_cell && pr.port == port; }); - if (fnd_user != usr.end()) { - co_cin_net->users.push_back(*fnd_user); - usr.erase(fnd_user); - cin_cell->ports.at(port).net = co_cin_net; + NetInfo *out_net = lc->ports.at(id_O).net; + auto &cin_p = cin_cell->ports.at(port); + if (cin_p.net == out_net) { + cin_cell->disconnectPort(port); + cin_cell->connectPort(port, co_cin_net); ++replaced_ports; } } @@ -181,30 +164,16 @@ class ChainConstrainer lc->params[id_CARRY_ENABLE] = Property::State::S1; lc->params[id_CIN_CONST] = Property::State::S1; lc->params[id_CIN_SET] = Property::State::S1; - lc->ports.at(id_I1).net = cin_port.net; - cin_port.net->users.erase(std::remove_if(cin_port.net->users.begin(), cin_port.net->users.end(), - [cin_cell, cin_port](const PortRef &usr) { - return usr.cell == cin_cell && usr.port == cin_port.name; - })); - PortRef i1_ref; - i1_ref.cell = lc.get(); - i1_ref.port = id_I1; - lc->ports.at(id_I1).net->users.push_back(i1_ref); + lc->connectPort(id_I1, cin_port.net); + cin_port.net->users.remove(cin_port.user_idx); NetInfo *out_net = ctx->createNet(ctx->id(lc->name.str(ctx) + "$O")); - PortRef drv_ref; - drv_ref.port = id_COUT; - drv_ref.cell = lc.get(); - out_net->driver = drv_ref; - lc->ports.at(id_COUT).net = out_net; + lc->connectPort(id_COUT, out_net); - PortRef usr_ref; - usr_ref.port = cin_port.name; - usr_ref.cell = cin_cell; - out_net->users.push_back(usr_ref); - cin_cell->ports.at(cin_port.name).net = out_net; + cin_port.net = nullptr; + cin_cell->connectPort(cin_port.name, out_net); IdString name = lc->name; ctx->assignCellInfo(lc.get()); diff --git a/ice40/pack.cc b/ice40/pack.cc index 4244f192..c32e346f 100644 --- a/ice40/pack.cc +++ b/ice40/pack.cc @@ -214,14 +214,14 @@ static void pack_carries(Context *ctx) PortRef pr; pr.cell = created_lc.get(); pr.port = id_I1; - i0_net->users.push_back(pr); + created_lc->ports.at(id_I1).user_idx = i0_net->users.add(pr); } created_lc->ports.at(id_I2).net = i1_net; if (i1_net) { PortRef pr; pr.cell = created_lc.get(); pr.port = id_I2; - i1_net->users.push_back(pr); + created_lc->ports.at(id_I2).user_idx = i1_net->users.add(pr); } new_cells.push_back(std::move(created_lc)); ++carry_only; @@ -230,16 +230,12 @@ static void pack_carries(Context *ctx) ci->movePortTo(id_CI, carry_lc, id_CIN); ci->movePortTo(id_CO, carry_lc, id_COUT); if (i0_net) { - auto &i0_usrs = i0_net->users; - i0_usrs.erase(std::remove_if(i0_usrs.begin(), i0_usrs.end(), [ci, ctx](const PortRef &pr) { - return pr.cell == ci && pr.port == id_I0; - })); + if (ci->ports.count(id_I0) && ci->ports.at(id_I0).user_idx) + i0_net->users.remove(ci->ports.at(id_I0).user_idx); } if (i1_net) { - auto &i1_usrs = i1_net->users; - i1_usrs.erase(std::remove_if(i1_usrs.begin(), i1_usrs.end(), [ci, ctx](const PortRef &pr) { - return pr.cell == ci && pr.port == id_I1; - })); + if (ci->ports.count(id_I1) && ci->ports.at(id_I1).user_idx) + i1_net->users.remove(ci->ports.at(id_I1).user_idx); } // Check for constant driver on CIN @@ -251,10 +247,7 @@ static void pack_carries(Context *ctx) cin_net == ctx->id("$PACKER_VCC_NET") ? Property::State::S1 : Property::State::S0; carry_lc->ports.at(id_CIN).net = nullptr; auto &cin_users = ctx->nets.at(cin_net)->users; - cin_users.erase( - std::remove_if(cin_users.begin(), cin_users.end(), [carry_lc, ctx](const PortRef &pr) { - return pr.cell == carry_lc && pr.port == id_CIN; - })); + cin_users.remove(carry_lc->ports.at(id_CIN).user_idx); } } exhausted_cells.insert(carry_lc->name); @@ -326,17 +319,20 @@ static void set_net_constant(const Context *ctx, NetInfo *orig, NetInfo *constne if ((is_lut(ctx, uc) || is_lc(ctx, uc) || is_carry(ctx, uc)) && (user.port.str(ctx).at(0) == 'I') && !constval) { uc->ports[user.port].net = nullptr; + uc->ports[user.port].user_idx = {}; } else if ((is_sb_mac16(ctx, uc) || uc->type == id_ICESTORM_DSP) && (user.port != id_CLK && ((constval && user.port == id_CE) || (!constval && user.port != id_CE)))) { uc->ports[user.port].net = nullptr; + uc->ports[user.port].user_idx = {}; } else if (is_ram(ctx, uc) && !constval && user.port != id_RCLK && user.port != id_RCLKN && user.port != id_WCLK && user.port != id_WCLKN && user.port != id_RCLKE && user.port != id_WCLKE) { uc->ports[user.port].net = nullptr; + uc->ports[user.port].user_idx = {}; } else { uc->ports[user.port].net = constnet; - constnet->users.push_back(user); + uc->ports[user.port].user_idx = constnet->users.add(user); } } } @@ -486,8 +482,8 @@ static void pack_io(Context *ctx) ci->type.c_str(ctx), ci->name.c_str(ctx)); NetInfo *net = sb->ports.at(id_PACKAGE_PIN).net; if (((ci->type == ctx->id("$nextpnr_ibuf") || ci->type == ctx->id("$nextpnr_iobuf")) && - net->users.size() > 1) || - (ci->type == ctx->id("$nextpnr_obuf") && (net->users.size() > 2 || net->driver.cell != nullptr))) + net->users.entries() > 1) || + (ci->type == ctx->id("$nextpnr_obuf") && (net->users.entries() > 2 || net->driver.cell != nullptr))) log_error("PACKAGE_PIN of %s '%s' connected to more than a single top level IO.\n", sb->type.c_str(ctx), sb->name.c_str(ctx)); @@ -531,9 +527,9 @@ static void pack_io(Context *ctx) sb->attrs[attr.first] = attr.second; } else if (is_sb_io(ctx, ci) || is_sb_gb_io(ctx, ci)) { NetInfo *net = ci->ports.at(id_PACKAGE_PIN).net; - if ((net != nullptr) && ((net->users.size() > 2) || + if ((net != nullptr) && ((net->users.entries() > 2) || (net->driver.cell != nullptr && - net->driver.cell->type == ctx->id("$nextpnr_obuf") && net->users.size() > 1))) + net->driver.cell->type == ctx->id("$nextpnr_obuf") && net->users.entries() > 1))) log_error("PACKAGE_PIN of %s '%s' connected to more than a single top level IO.\n", ci->type.c_str(ctx), ci->name.c_str(ctx)); } @@ -554,11 +550,11 @@ static void pack_io(Context *ctx) NetInfo *net_in0 = ci->ports.count(id_D_IN_0) ? ci->ports[id_D_IN_0].net : nullptr; NetInfo *net_in1 = ci->ports.count(id_D_IN_1) ? ci->ports[id_D_IN_1].net : nullptr; - if (net_in0 != nullptr && net_in0->users.size() == 0) { + if (net_in0 != nullptr && net_in0->users.entries() == 0) { delete_nets.insert(net_in0->name); ci->ports[id_D_IN_0].net = nullptr; } - if (net_in1 != nullptr && net_in1->users.size() == 0) { + if (net_in1 != nullptr && net_in1->users.entries() == 0) { delete_nets.insert(net_in1->name); ci->ports[id_D_IN_1].net = nullptr; } @@ -591,28 +587,23 @@ static void insert_global(Context *ctx, NetInfo *net, bool is_reset, bool is_cen std::string glb_name = net->name.str(ctx) + std::string("_$glb_") + (is_reset ? "sr" : (is_cen ? "ce" : "clk")); std::unique_ptr<CellInfo> gb = create_ice_cell(ctx, id_SB_GB, "$gbuf_" + glb_name); - gb->ports[id_USER_SIGNAL_TO_GLOBAL_BUFFER].net = net; - PortRef pr; - pr.cell = gb.get(); - pr.port = id_USER_SIGNAL_TO_GLOBAL_BUFFER; - net->users.push_back(pr); - - pr.cell = gb.get(); - pr.port = id_GLOBAL_BUFFER_OUTPUT; + gb->connectPort(id_USER_SIGNAL_TO_GLOBAL_BUFFER, net); NetInfo *glbnet = ctx->createNet(ctx->id(glb_name)); - glbnet->driver = pr; - gb->ports[id_GLOBAL_BUFFER_OUTPUT].net = glbnet; + gb->connectPort(id_GLOBAL_BUFFER_OUTPUT, glbnet); + std::vector<PortRef> keep_users; for (auto user : net->users) { if (is_clock_port(ctx, user) || (is_reset && is_reset_port(ctx, user)) || (is_cen && is_enable_port(ctx, user)) || (is_logic && is_logic_port(ctx, user))) { user.cell->ports[user.port].net = glbnet; - glbnet->users.push_back(user); + user.cell->ports[user.port].user_idx = glbnet->users.add(user); } else { keep_users.push_back(user); } } - net->users = keep_users; + net->users.clear(); + for (auto &user : keep_users) + user.cell->ports[user.port].user_idx = net->users.add(user); if (net->clkconstr) { glbnet->clkconstr = std::unique_ptr<ClockConstraint>(new ClockConstraint()); @@ -809,7 +800,7 @@ static void place_plls(Context *ctx) log_error("PLL '%s' has a PACKAGEPIN driven by an %s, should be directly connected to an input " "SB_IO.D_IN_0 port\n", ci->name.c_str(ctx), io_cell->type.c_str(ctx)); - if (ni->users.size() != 1) + if (ni->users.entries() != 1) log_error("PLL '%s' clock input '%s' can only drive PLL\n", ci->name.c_str(ctx), ni->name.c_str(ctx)); if (!io_cell->attrs.count(id_BEL)) log_error("PLL '%s' PACKAGEPIN SB_IO '%s' is unconstrained\n", ci->name.c_str(ctx), @@ -911,10 +902,10 @@ static void place_plls(Context *ctx) // Used global connections bool gb_a_used = ci->ports.count(id_PLLOUT_A_GLOBAL) && (ci->ports[id_PLLOUT_A_GLOBAL].net != nullptr) && - (ci->ports[id_PLLOUT_A_GLOBAL].net->users.size() > 0); + (ci->ports[id_PLLOUT_A_GLOBAL].net->users.entries() > 0); bool gb_b_used = is_sb_pll40_dual(ctx, ci) && ci->ports.count(id_PLLOUT_B_GLOBAL) && (ci->ports[id_PLLOUT_B_GLOBAL].net != nullptr) && - (ci->ports[id_PLLOUT_B_GLOBAL].net->users.size() > 0); + (ci->ports[id_PLLOUT_B_GLOBAL].net->users.entries() > 0); // Check for conflict BelPin pll_io_a, pll_io_b; @@ -954,15 +945,15 @@ static void place_plls(Context *ctx) // Used global connections bool gb_a_used = ci->ports.count(id_PLLOUT_A_GLOBAL) && (ci->ports[id_PLLOUT_A_GLOBAL].net != nullptr) && - (ci->ports[id_PLLOUT_A_GLOBAL].net->users.size() > 0); + (ci->ports[id_PLLOUT_A_GLOBAL].net->users.entries() > 0); bool gb_b_used = is_sb_pll40_dual(ctx, ci) && ci->ports.count(id_PLLOUT_B_GLOBAL) && (ci->ports[id_PLLOUT_B_GLOBAL].net != nullptr) && - (ci->ports[id_PLLOUT_B_GLOBAL].net->users.size() > 0); + (ci->ports[id_PLLOUT_B_GLOBAL].net->users.entries() > 0); // Could this be a PAD PLL ? bool could_be_pad = false; BelId pad_bel; - if (ni->users.size() == 1 && is_sb_io(ctx, ni->driver.cell) && ni->driver.cell->attrs.count(id_BEL)) + if (ni->users.entries() == 1 && is_sb_io(ctx, ni->driver.cell) && ni->driver.cell->attrs.count(id_BEL)) pad_bel = ctx->getBelByNameStr(ni->driver.cell->attrs[id_BEL].as_string()); // Find a BEL for it @@ -1035,7 +1026,7 @@ static std::unique_ptr<CellInfo> spliceLUT(Context *ctx, CellInfo *ci, IdString PortRef pr; pr.cell = user.cell; pr.port = user.port; - out_net->users.push_back(pr); + user.cell->ports[user.port].user_idx = out_net->users.add(pr); } // Add LUT to new users. @@ -1045,8 +1036,10 @@ static std::unique_ptr<CellInfo> spliceLUT(Context *ctx, CellInfo *ci, IdString new_users.push_back(pr); pt->ports.at(id_I3).net = port.net; - // Replace users of the original net. - port.net->users = new_users; + // Replace users of the original net + port.net->users.clear(); + for (auto &usr : new_users) + usr.cell->ports.at(usr.port).user_idx = port.net->users.add(usr); return pt; } @@ -1235,7 +1228,7 @@ static void pack_special(Context *ctx) if ((pi.name != id_RGB0) && (pi.name != id_RGB1) && (pi.name != id_RGB2)) continue; - if (net->users.size() > 0) + if (net->users.entries() > 0) log_error("SB_RGB_DRV/SB_RGBA_DRV port connected to more than just package pin !\n"); ctx->nets.erase(net->name); @@ -1585,7 +1578,7 @@ void pack_plls(Context *ctx) // Only if there is actually a net ... if (pi.net != nullptr) { // ... and it's used - if (pi.net->users.size() > 0) { + if (pi.net->users.entries() > 0) { std::unique_ptr<CellInfo> gb = create_padin_gbuf(ctx, packed.get(), pi.name, "$gbuf_" + ci->name.str(ctx) + "_pllout_" + (is_b_port ? "b" : "a")); diff --git a/machxo2/pack.cc b/machxo2/pack.cc index 5051a981..56efb63f 100644 --- a/machxo2/pack.cc +++ b/machxo2/pack.cc @@ -160,7 +160,7 @@ static void set_net_constant(Context *ctx, NetInfo *orig, NetInfo *constnet, boo uc->ports[user.port].net = constnet; } - constnet->users.push_back(user); + user.cell->ports.at(user.port).user_idx = constnet->users.add(user); } } orig->users.clear(); diff --git a/mistral/pack.cc b/mistral/pack.cc index c4b3afe3..703fa386 100644 --- a/mistral/pack.cc +++ b/mistral/pack.cc @@ -200,10 +200,10 @@ struct MistralPacker NetInfo *o = ci->getPort(id_O); if (o == nullptr) ; - else if (o->users.size() > 1) + else if (o->users.entries() > 1) log_error("Top level pin '%s' has multiple input buffers\n", ctx->nameOf(port.first)); - else if (o->users.size() == 1) - top_port = o->users.at(0); + else if (o->users.entries() == 1) + top_port = *o->users.begin(); } if (ci->type == ctx->id("$nextpnr_obuf") || ci->type == ctx->id("$nextpnr_iobuf")) { // Might have an output buffer (OB etc) connected to it @@ -215,9 +215,9 @@ struct MistralPacker top_port = i->driver; } // Edge case of a bidirectional buffer driving an output pin - if (i->users.size() > 2) { + if (i->users.entries() > 2) { log_error("Top level pin '%s' has illegal buffer configuration\n", ctx->nameOf(port.first)); - } else if (i->users.size() == 2) { + } else if (i->users.entries() == 2) { if (top_port.cell != nullptr) log_error("Top level pin '%s' has illegal buffer configuration\n", ctx->nameOf(port.first)); for (auto &usr : i->users) { @@ -300,9 +300,9 @@ struct MistralPacker const NetInfo *co = cursor->getPort(id_CO); if (co == nullptr || co->users.empty()) break; - if (co->users.size() > 1) + if (co->users.entries() > 1) log_error("Carry net %s has more than one sink!\n", ctx->nameOf(co)); - auto &usr = co->users.at(0); + auto &usr = *co->users.begin(); if (usr.port != id_CI) log_error("Carry net %s drives port %s, expected CI\n", ctx->nameOf(co), ctx->nameOf(usr.port)); cursor = usr.cell; diff --git a/nexus/global.cc b/nexus/global.cc index 1c763a31..31bf0a6b 100644 --- a/nexus/global.cc +++ b/nexus/global.cc @@ -72,7 +72,7 @@ struct NexusGlobalRouter // Dedicated backwards BFS routing for global networks template <typename Tfilt> - bool backwards_bfs_route(NetInfo *net, size_t user_idx, int iter_limit, bool strict, Tfilt pip_filter) + bool backwards_bfs_route(NetInfo *net, store_index<PortRef> user_idx, int iter_limit, bool strict, Tfilt pip_filter) { // Queue of wires to visit std::queue<WireId> visit; @@ -178,9 +178,9 @@ struct NexusGlobalRouter void route_clk_net(NetInfo *net) { - for (size_t i = 0; i < net->users.size(); i++) - backwards_bfs_route(net, i, 1000000, true, [&](PipId pip) { - return (is_relaxed_sink(net->users.at(i)) || global_pip_filter(pip)) && routeability_pip_filter(pip); + for (auto usr : net->users.enumerate()) + backwards_bfs_route(net, usr.index, 1000000, true, [&](PipId pip) { + return (is_relaxed_sink(usr.value) || global_pip_filter(pip)) && routeability_pip_filter(pip); }); log_info(" routed net '%s' using global resources\n", ctx->nameOf(net)); } diff --git a/nexus/pack.cc b/nexus/pack.cc index 5509d997..a769c05a 100644 --- a/nexus/pack.cc +++ b/nexus/pack.cc @@ -489,10 +489,10 @@ struct NexusPacker NetInfo *o = ci->getPort(id_O); if (o == nullptr) ; - else if (o->users.size() > 1) + else if (o->users.entries() > 1) log_error("Top level pin '%s' has multiple input buffers\n", ctx->nameOf(port.first)); - else if (o->users.size() == 1) - top_port = o->users.at(0); + else if (o->users.entries() == 1) + top_port = *o->users.begin(); } if (ci->type == ctx->id("$nextpnr_obuf") || ci->type == ctx->id("$nextpnr_iobuf")) { // Might have an output buffer (OB etc) connected to it @@ -504,9 +504,9 @@ struct NexusPacker top_port = i->driver; } // Edge case of a bidirectional buffer driving an output pin - if (i->users.size() > 2) { + if (i->users.entries() > 2) { log_error("Top level pin '%s' has illegal buffer configuration\n", ctx->nameOf(port.first)); - } else if (i->users.size() == 2) { + } else if (i->users.entries() == 2) { if (top_port.cell != nullptr) log_error("Top level pin '%s' has illegal buffer configuration\n", ctx->nameOf(port.first)); for (auto &usr : i->users) { @@ -834,13 +834,15 @@ struct NexusPacker for (auto &usr : net->users) { if (pred(usr)) { usr.cell->ports[usr.port].net = buffered_net; - buffered_net->users.push_back(usr); + usr.cell->ports[usr.port].user_idx = buffered_net->users.add(usr); } else { remaining_users.push_back(usr); } } - std::swap(net->users, remaining_users); + net->users.clear(); + for (auto &usr : remaining_users) + usr.cell->ports.at(usr.port).user_idx = net->users.add(usr); // Connect buffer input to original net buffer->connectPort(i, net); @@ -1195,11 +1197,11 @@ struct NexusPacker if (di != nullptr && di->driver.cell != nullptr) iob = di->driver.cell; NetInfo *dout = ci->getPort(id_DOUT); - if (dout != nullptr && dout->users.size() == 1) - iob = dout->users.at(0).cell; + if (dout != nullptr && dout->users.entries() == 1) + iob = (*dout->users.begin()).cell; NetInfo *tout = ci->getPort(id_TOUT); - if (tout != nullptr && tout->users.size() == 1) - iob = tout->users.at(0).cell; + if (tout != nullptr && tout->users.entries() == 1) + iob = (*tout->users.begin()).cell; if (iob == nullptr || (iob->type != id_SEIO18_CORE && iob->type != id_SEIO33_CORE && iob->type != id_DIFFIO18_CORE)) log_error("Failed to find associated IOB for IOLOGIC %s\n", ctx->nameOf(ci)); @@ -1358,11 +1360,12 @@ struct NexusPacker NetInfo *fco = combs[1]->getPort(id_FCO); ci = nullptr; if (fco != nullptr) { - if (fco->users.size() > 1) + if (fco->users.entries() > 1) log_error("Carry cell '%s' has multiple fanout on FCO\n", ctx->nameOf(combs[1])); - else if (fco->users.size() == 1) { - NPNR_ASSERT(fco->users.at(0).port == id_CIN); - ci = fco->users.at(0).cell; + else if (fco->users.entries() == 1) { + auto &u0 = *fco->users.begin(); + NPNR_ASSERT(u0.port == id_CIN); + ci = u0.cell; } } } while (ci != nullptr); @@ -2161,13 +2164,13 @@ struct NexusPacker isIDDR = true; } NetInfo *dout = iol->getPort(id_DOUT); - if (dout != nullptr && dout->users.size() == 1) { - iob = dout->users.at(0).cell; + if (dout != nullptr && dout->users.entries() == 1) { + iob = (*dout->users.begin()).cell; isODDR = true; } NetInfo *tout = iol->getPort(id_TOUT); - if (tout != nullptr && tout->users.size() == 1) { - iob = tout->users.at(0).cell; + if (tout != nullptr && tout->users.entries() == 1) { + iob = (*tout->users.begin()).cell; isODDR = true; // FIXME: Not sure } NPNR_ASSERT(iob != nullptr); @@ -2359,7 +2362,7 @@ struct NexusPacker } // Skip if there are multiple sinks on that net - if (di->users.size() != 1) { + if (di->users.entries() != 1) { continue; } |