diff options
Diffstat (limited to 'common')
| -rw-r--r-- | common/place/detail_place_cfg.h | 36 | ||||
| -rw-r--r-- | common/place/detail_place_core.cc | 457 | ||||
| -rw-r--r-- | common/place/detail_place_core.h | 224 | ||||
| -rw-r--r-- | common/place/parallel_refine.cc | 552 | ||||
| -rw-r--r-- | common/place/parallel_refine.h | 6 | 
5 files changed, 730 insertions, 545 deletions
| diff --git a/common/place/detail_place_cfg.h b/common/place/detail_place_cfg.h new file mode 100644 index 00000000..dadda0e8 --- /dev/null +++ b/common/place/detail_place_cfg.h @@ -0,0 +1,36 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2021-22  gatecat <gatecat@ds0.me> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef DETAIL_PLACE_CFG_H +#define DETAIL_PLACE_CFG_H + +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct DetailPlaceCfg +{ +    DetailPlaceCfg(Context *ctx); +    bool timing_driven; +    int hpwl_scale_x, hpwl_scale_y; +    float crit_exp = 8; +}; + +NEXTPNR_NAMESPACE_END +#endif diff --git a/common/place/detail_place_core.cc b/common/place/detail_place_core.cc new file mode 100644 index 00000000..bb383639 --- /dev/null +++ b/common/place/detail_place_core.cc @@ -0,0 +1,457 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2021-22  gatecat <gatecat@ds0.me> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "detail_place_core.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +DetailPlaceCfg::DetailPlaceCfg(Context *ctx) +{ +    timing_driven = ctx->setting<bool>("timing_driven"); + +    hpwl_scale_x = 1; +    hpwl_scale_y = 1; +} + +PlacePartition::PlacePartition(Context *ctx) +{ +    x0 = ctx->getGridDimX(); +    y0 = ctx->getGridDimY(); +    x1 = 0; +    y1 = 0; +    for (auto &cell : ctx->cells) { +        Loc l = ctx->getBelLocation(cell.second->bel); +        x0 = std::min(x0, l.x); +        x1 = std::max(x1, l.x); +        y0 = std::min(y0, l.y); +        y1 = std::max(y1, l.y); +        cells.push_back(cell.second.get()); +    } +} + +void PlacePartition::split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r) +{ +    std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) { +        Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel); +        return yaxis ? (l0.y < l1.y) : (l0.x < l1.x); +    }); +    size_t pivot_point = size_t(cells.size() * pivot); +    l.cells.clear(); +    r.cells.clear(); +    l.cells.reserve(pivot_point); +    r.cells.reserve(cells.size() - pivot_point); +    int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1) +                                         : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y +                                                  : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x); +    for (size_t i = 0; i < cells.size(); i++) { +        Loc loc = ctx->getBelLocation(cells.at(i)->bel); +        ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i)); +    } +    if (yaxis) { +        l.x0 = r.x0 = x0; +        l.x1 = r.x1 = x1; +        l.y0 = y0; +        l.y1 = pivot_coord; +        r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1); +        r.y1 = y1; +    } else { +        l.y0 = r.y0 = y0; +        l.y1 = r.y1 = y1; +        l.x0 = x0; +        l.x1 = pivot_coord; +        r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1); +        r.x1 = x1; +    } +} + +void DetailPlacerState::update_global_costs() +{ +    last_bounds.resize(flat_nets.size()); +    last_tmg_costs.resize(flat_nets.size()); +    total_wirelen = 0; +    total_timing_cost = 0; +    for (size_t i = 0; i < flat_nets.size(); i++) { +        NetInfo *ni = flat_nets.at(i); +        if (skip_net(ni)) +            continue; +        last_bounds.at(i) = NetBB::compute(ctx, ni); +        total_wirelen += last_bounds.at(i).hpwl(base_cfg); +        if (!timing_skip_net(ni)) { +            auto &tc = last_tmg_costs.at(i); +            tc.resize(ni->users.capacity()); +            for (auto usr : ni->users.enumerate()) { +                tc.at(usr.index.idx()) = get_timing_cost(ni, usr.index); +                total_timing_cost += tc.at(usr.index.idx()); +            } +        } +    } +} + +NetBB NetBB::compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel) +{ +    NetBB result{}; +    if (!net->driver.cell) +        return result; +    auto bel_loc = [&](const CellInfo *cell) { +        BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel; +        return ctx->getBelLocation(bel); +    }; +    result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1; +    Loc drv_loc = bel_loc(net->driver.cell); +    result.x0 = result.x1 = drv_loc.x; +    result.y0 = result.y1 = drv_loc.y; +    for (auto &usr : net->users) { +        Loc l = bel_loc(usr.cell); +        if (l.x == result.x0) +            ++result.nx0; // on the edge +        else if (l.x < result.x0) { +            result.x0 = l.x; // extends the edge +            result.nx0 = 1; +        } +        if (l.x == result.x1) +            ++result.nx1; // on the edge +        else if (l.x > result.x1) { +            result.x1 = l.x; // extends the edge +            result.nx1 = 1; +        } +        if (l.y == result.y0) +            ++result.ny0; // on the edge +        else if (l.y < result.y0) { +            result.y0 = l.y; // extends the edge +            result.ny0 = 1; +        } +        if (l.y == result.y1) +            ++result.ny1; // on the edge +        else if (l.y > result.y1) { +            result.y1 = l.y; // extends the edge +            result.ny1 = 1; +        } +    } +    return result; +} + +void DetailPlacerThreadState::set_partition(const PlacePartition &part) +{ +    p = part; +    thread_nets.clear(); +    thread_net_idx.resize(g.flat_nets.size()); +    std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1); +    // Determine the set of nets that are within the thread; and therefore we care about +    for (auto thread_cell : part.cells) { +        for (auto &port : thread_cell->ports) { +            if (!port.second.net) +                continue; +            int global_idx = port.second.net->udata; +            auto &thread_idx = thread_net_idx.at(global_idx); +            // Already added to the set +            if (thread_idx != -1) +                continue; +            thread_idx = thread_nets.size(); +            thread_nets.push_back(port.second.net); +        } +    } +    tmg_ignored_nets.clear(); +    ignored_nets.clear(); +    for (auto tn : thread_nets) { +        ignored_nets.push_back(g.skip_net(tn)); +        tmg_ignored_nets.push_back(g.timing_skip_net(tn)); +    } +    // Set up the original cell-bel map for all nets inside the thread +    local_cell2bel.clear(); +    for (NetInfo *net : thread_nets) { +        if (net->driver.cell) +            local_cell2bel[net->driver.cell->name] = net->driver.cell->bel; +        for (auto &usr : net->users) +            local_cell2bel[usr.cell->name] = usr.cell->bel; +    } +} + +void DetailPlacerThreadState::setup_initial_state() +{ +    // Setup initial net bounding boxes and timing costs +    net_bounds.clear(); +    arc_tmg_cost.clear(); +    for (auto tn : thread_nets) { +        net_bounds.push_back(g.last_bounds.at(tn->udata)); +        arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata)); +    } +    new_net_bounds = net_bounds; +    for (int j = 0; j < 2; j++) { +        auto &a = axes.at(j); +        a.already_bounds_changed.resize(net_bounds.size()); +    } +    already_timing_changed.clear(); +    already_timing_changed.resize(net_bounds.size()); +    for (size_t i = 0; i < thread_nets.size(); i++) +        already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity()); +} + +bool DetailPlacerThreadState::bounds_check(BelId bel) +{ +    Loc l = ctx->getBelLocation(bel); +    if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1) +        return false; +    return true; +} + +bool DetailPlacerThreadState::bind_move() +{ +#if !defined(__wasm) +    std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); +#endif +    for (auto &entry : moved_cells) { +        ctx->unbindBel(entry.second.first); +    } +    bool success = true; +    for (auto &entry : moved_cells) { +        // Make sure targets are available before we bind them +        if (!ctx->checkBelAvail(entry.second.second)) { +            success = false; +            break; +        } +        ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); +    } +    arch_state_dirty = true; +    return success; +} + +bool DetailPlacerThreadState::check_validity() +{ +#if !defined(__wasm) +    std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex); +#endif +    bool result = true; +    for (auto e : moved_cells) { +        if (!ctx->isBelLocationValid(e.second.first)) { +            // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer +            // enabling dedicated routes to be used +            result = false; +            break; +        } +        if (!ctx->isBelLocationValid(e.second.second)) { +            result = false; +            break; +        } +    } +    return result; +} + +void DetailPlacerThreadState::revert_move() +{ +    if (arch_state_dirty) { +        // If changes to the arch state were made, revert them by restoring original cell bindings +#if !defined(__wasm) +        std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); +#endif +        for (auto &entry : moved_cells) { +            BelId curr_bound = ctx->cells.at(entry.first)->bel; +            if (curr_bound != BelId()) +                ctx->unbindBel(curr_bound); +        } +        for (auto &entry : moved_cells) { +            ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); +        } +        arch_state_dirty = false; +    } +    for (auto &entry : moved_cells) +        local_cell2bel[entry.first] = entry.second.first; +} + +void DetailPlacerThreadState::commit_move() +{ +    arch_state_dirty = false; +    for (auto &axis : axes) { +        for (auto bc : axis.bounds_changed_nets) { +            // Commit updated net bounds +            net_bounds.at(bc) = new_net_bounds.at(bc); +        } +    } +    if (g.base_cfg.timing_driven) { +        NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size()); +        for (size_t i = 0; i < timing_changed_arcs.size(); i++) { +            auto arc = timing_changed_arcs.at(i); +            arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i); +        } +    } +} + +void DetailPlacerThreadState::compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel) +{ +    Loc new_loc = ctx->getBelLocation(new_bel); +    Loc old_loc = ctx->getBelLocation(old_bel); +    for (const auto &port : cell->ports) { +        NetInfo *pn = port.second.net; +        if (!pn) +            continue; +        int idx = thread_net_idx.at(pn->udata); +        if (ignored_nets.at(idx)) +            continue; +        NetBB &new_bounds = new_net_bounds.at(idx); +        // For the x-axis (i=0) and y-axis (i=1) +        for (int i = 0; i < 2; i++) { +            auto &axis = axes.at(i); +            // New and old on this axis +            int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x; +            // References to updated bounding box entries +            auto &b0 = i ? new_bounds.y0 : new_bounds.x0; +            auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0; +            auto &b1 = i ? new_bounds.y1 : new_bounds.x1; +            auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1; +            auto &change = axis.already_bounds_changed.at(idx); +            // Lower bound +            if (new_pos < b0) { +                // Further out than current lower bound +                b0 = new_pos; +                n0 = 1; +                if (change == NO_CHANGE) { +                    change = CELL_MOVED_OUTWARDS; +                    axis.bounds_changed_nets.push_back(idx); +                } +            } else if (new_pos == b0 && old_pos > b0) { +                // Moved from inside into current bound +                ++n0; +                if (change == NO_CHANGE) { +                    change = CELL_MOVED_OUTWARDS; +                    axis.bounds_changed_nets.push_back(idx); +                } +            } else if (old_pos == b0 && new_pos > b0) { +                // Moved from current bound to inside +                if (change == NO_CHANGE) +                    axis.bounds_changed_nets.push_back(idx); +                if (n0 == 1) { +                    // Was the last cell on the bound; have to do a full recompute +                    change = FULL_RECOMPUTE; +                } else { +                    --n0; +                    if (change == NO_CHANGE) +                        change = CELL_MOVED_INWARDS; +                } +            } +            // Upper bound +            if (new_pos > b1) { +                // Further out than current upper bound +                b1 = new_pos; +                n1 = new_pos; +                if (change == NO_CHANGE) { +                    change = CELL_MOVED_OUTWARDS; +                    axis.bounds_changed_nets.push_back(idx); +                } +            } else if (new_pos == b1 && old_pos < b1) { +                // Moved onto current bound +                ++n1; +                if (change == NO_CHANGE) { +                    change = CELL_MOVED_OUTWARDS; +                    axis.bounds_changed_nets.push_back(idx); +                } +            } else if (old_pos == b1 && new_pos < b1) { +                // Moved from current bound to inside +                if (change == NO_CHANGE) +                    axis.bounds_changed_nets.push_back(idx); +                if (n1 == 1) { +                    // Was the last cell on the bound; have to do a full recompute +                    change = FULL_RECOMPUTE; +                } else { +                    --n1; +                    if (change == NO_CHANGE) +                        change = CELL_MOVED_INWARDS; +                } +            } +        } +        // Timing updates if timing driven +        if (g.base_cfg.timing_driven && !tmg_ignored_nets.at(idx)) { +            if (port.second.type == PORT_OUT) { +                int cc; +                TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); +                if (cls != TMG_IGNORE) { +                    for (auto usr : pn->users.enumerate()) +                        if (!already_timing_changed.at(idx).at(usr.index.idx())) { +                            timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index)); +                            already_timing_changed.at(idx).at(usr.index.idx()) = true; +                        } +                } +            } else { +                auto usr = port.second.user_idx; +                if (!already_timing_changed.at(idx).at(usr.idx())) { +                    timing_changed_arcs.emplace_back(std::make_pair(idx, usr)); +                    already_timing_changed.at(idx).at(usr.idx()) = true; +                } +            } +        } +    } +} + +void DetailPlacerThreadState::compute_total_change() +{ +    auto &xa = axes.at(0), &ya = axes.at(1); +    for (auto &bc : xa.bounds_changed_nets) +        if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE) +            new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); +    for (auto &bc : ya.bounds_changed_nets) +        if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE) +            new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); +    for (auto &bc : xa.bounds_changed_nets) +        wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg)); +    for (auto &bc : ya.bounds_changed_nets) +        if (xa.already_bounds_changed.at(bc) == NO_CHANGE) +            wirelen_delta += (new_net_bounds.at(bc).hpwl(g.base_cfg) - net_bounds.at(bc).hpwl(g.base_cfg)); +    if (g.base_cfg.timing_driven) { +        NPNR_ASSERT(new_timing_costs.empty()); +        for (auto arc : timing_changed_arcs) { +            double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel); +            timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx())); +            new_timing_costs.push_back(new_cost); +        } +    } +} + +void DetailPlacerThreadState::reset_move_state() +{ +    moved_cells.clear(); +    cell_rel.clear(); +    for (auto &axis : axes) { +        for (auto bc : axis.bounds_changed_nets) { +            new_net_bounds.at(bc) = net_bounds.at(bc); +            axis.already_bounds_changed[bc] = NO_CHANGE; +        } +        axis.bounds_changed_nets.clear(); +    } +    for (auto &arc : timing_changed_arcs) { +        already_timing_changed.at(arc.first).at(arc.second.idx()) = false; +    } +    timing_changed_arcs.clear(); +    new_timing_costs.clear(); +    wirelen_delta = 0; +    timing_delta = 0; +} + +bool DetailPlacerThreadState::add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel) +{ +    if (!bounds_check(old_bel) || !bounds_check(new_bel)) +        return false; +    if (!ctx->isValidBelForCellType(cell->type, new_bel)) +        return false; +    NPNR_ASSERT(!moved_cells.count(cell->name)); +    moved_cells[cell->name] = std::make_pair(old_bel, new_bel); +    local_cell2bel[cell->name] = new_bel; +    compute_changes_for_cell(cell, old_bel, new_bel); +    return true; +} + +NEXTPNR_NAMESPACE_END diff --git a/common/place/detail_place_core.h b/common/place/detail_place_core.h new file mode 100644 index 00000000..1c280f24 --- /dev/null +++ b/common/place/detail_place_core.h @@ -0,0 +1,224 @@ +/* + *  nextpnr -- Next Generation Place and Route + * + *  Copyright (C) 2021-22  gatecat <gatecat@ds0.me> + * + *  Permission to use, copy, modify, and/or distribute this software for any + *  purpose with or without fee is hereby granted, provided that the above + *  copyright notice and this permission notice appear in all copies. + * + *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* +This provides core data structures for a thread-safe detail placer that needs to swap cells and evaluate the cost +changes of swaps. + +It works on a partition-based threading approach; although threading can be avoided by only instantiating one +per-thread structure and calling its methods from the main thread. + +Each thread's data includes its own local net indexing for nets inside the partition (which can overlap thread +boundaries); and its own local cell-to-bel mapping for any cells on those nets, so there are no races with moves +made by other threads. + +A move is an atomic transaction of updated cell to bel mappings inside a thread. The first step is to reset the +per-move structures; then to add all of the moved cells to the move with add_to_move. + +Evaluation of wirelength and timing changes of a move is done with compute_changes_for_cell and compute_total_change. + +bind_move will probationally bind the move using the arch API functions, acquiring a lock during this time to prevent +races on non-thread-safe arch implementations, returning true if the bind succeeded or false if something went wrong +and it should be aborted. check_validity must then be called to use the arch API validity check functions on the move. + +Finally if the move meets criteria and is accepted then commit_move marks it as committed, otherwise revert_move +aborts the entire move transaction. +*/ + +#ifndef DETAIL_PLACE_CORE_H +#define DETAIL_PLACE_CORE_H + +#include "nextpnr.h" + +#include "detail_place_cfg.h" +#include "fast_bels.h" +#include "timing.h" + +#include <queue> + +#if !defined(__wasm) +#include <shared_mutex> +#endif + +NEXTPNR_NAMESPACE_BEGIN + +struct PlacePartition +{ +    int x0, y0, x1, y1; +    std::vector<CellInfo *> cells; +    PlacePartition() = default; +    explicit PlacePartition(Context *ctx); +    void split(Context *ctx, bool yaxis, float pivot, PlacePartition &l, PlacePartition &r); +}; + +typedef int64_t wirelen_t; + +struct NetBB +{ +    // Actual bounding box +    int x0 = 0, x1 = 0, y0 = 0, y1 = 0; +    // Number of cells at each extremity +    int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; +    inline wirelen_t hpwl(const DetailPlaceCfg &cfg) const +    { +        return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); +    } +    static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr); +}; + +struct DetailPlacerState +{ +    explicit DetailPlacerState(Context *ctx, DetailPlaceCfg &cfg) +            : ctx(ctx), base_cfg(cfg), bels(ctx, false, 64), tmg(ctx){}; +    Context *ctx; +    DetailPlaceCfg &base_cfg; +    FastBels bels; +    std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index +    std::vector<NetBB> last_bounds; +    std::vector<std::vector<double>> last_tmg_costs; +    dict<IdString, NetBB> region_bounds; +    TimingAnalyser tmg; + +    wirelen_t total_wirelen = 0; +    double total_timing_cost = 0; + +#if !defined(__wasm) +    std::shared_timed_mutex archapi_mutex; +#endif + +    inline double get_timing_cost(const NetInfo *net, store_index<PortRef> user, +                                  const dict<IdString, BelId> *cell2bel = nullptr) +    { +        if (!net->driver.cell) +            return 0; +        const auto &sink = net->users.at(user); +        IdString driver_pin, sink_pin; +        // Pick the first pin for a prediction; assume all will be similar enouhg +        for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) { +            driver_pin = pin; +            break; +        } +        for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) { +            sink_pin = pin; +            break; +        } +        float crit = tmg.get_criticality(CellPortKey(sink)); +        BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel; +        BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel; +        double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin)); +        return delay * std::pow(crit, base_cfg.crit_exp); +    } + +    inline bool skip_net(const NetInfo *net) const +    { +        if (!net->driver.cell) +            return true; +        if (ctx->getBelGlobalBuf(net->driver.cell->bel)) +            return true; +        return false; +    } + +    inline bool timing_skip_net(const NetInfo *net) const +    { +        if (!net->driver.cell) +            return true; +        int cc; +        auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc); +        if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK) +            return true; +        return false; +    } + +    void update_global_costs(); +}; + +struct DetailPlacerThreadState +{ +    Context *ctx;         // Nextpnr context pointer +    DetailPlacerState &g; // Placer engine state +    int idx;              // Index of the thread +    DeterministicRNG rng; // Local RNG +    // The cell partition that the thread works on +    PlacePartition p; +    // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can +    // optimise +    std::vector<int> thread_net_idx; +    // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's +    // perspective +    std::vector<NetInfo *> thread_nets; +    std::vector<NetBB> net_bounds; +    std::vector<std::vector<double>> arc_tmg_cost; +    std::vector<bool> ignored_nets, tmg_ignored_nets; +    bool arch_state_dirty = false; +    // Our local cell-bel map; that won't be affected by out-of-partition moves +    dict<IdString, BelId> local_cell2bel; + +    // Data on an inflight move +    dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new) +    // For cluster moves only +    std::vector<std::pair<CellInfo *, Loc>> cell_rel; +    // For incremental wirelength and delay updates +    wirelen_t wirelen_delta = 0; +    double timing_delta = 0; +    // Wirelen related are handled on a per-axis basis to reduce +    enum BoundChange +    { +        NO_CHANGE, +        CELL_MOVED_INWARDS, +        CELL_MOVED_OUTWARDS, +        FULL_RECOMPUTE +    }; +    struct AxisChanges +    { +        std::vector<int> bounds_changed_nets; +        std::vector<BoundChange> already_bounds_changed; +    }; +    std::array<AxisChanges, 2> axes; +    std::vector<NetBB> new_net_bounds; + +    std::vector<std::vector<bool>> already_timing_changed; +    std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs; +    std::vector<double> new_timing_costs; + +    DetailPlacerThreadState(Context *ctx, DetailPlacerState &g, int idx) : ctx(ctx), g(g), idx(idx){}; +    void set_partition(const PlacePartition &part); +    void setup_initial_state(); +    bool bounds_check(BelId bel); + +    // Reset the inflight move state +    void reset_move_state(); +    // Add a cell change to the move +    bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel); +    // For an inflight move; attempt to actually apply the changes to the arch API +    bool bind_move(); +    // Checks if the arch API bel validity for a move is accepted +    bool check_validity(); +    // Undo any changes relating to an inflight move +    void revert_move(); +    // Mark the inflight move as complete and update cost structures +    void commit_move(); +    // Update the inflight cost change structures for a given cell moe +    void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel); +    // Update the total cost change for an inflight move +    void compute_total_change(); +}; + +NEXTPNR_NAMESPACE_END + +#endif diff --git a/common/place/parallel_refine.cc b/common/place/parallel_refine.cc index a868ca58..a2de5b13 100644 --- a/common/place/parallel_refine.cc +++ b/common/place/parallel_refine.cc @@ -22,9 +22,8 @@  #if !defined(__wasm) -#include "fast_bels.h" +#include "detail_place_core.h"  #include "scope_lock.h" -#include "timing.h"  #include <chrono>  #include <mutex> @@ -35,515 +34,24 @@  NEXTPNR_NAMESPACE_BEGIN  namespace { -struct Partition -{ -    int x0, y0, x1, y1; -    std::vector<CellInfo *> cells; -    Partition() = default; -    explicit Partition(Context *ctx) -    { -        x0 = ctx->getGridDimX(); -        y0 = ctx->getGridDimY(); -        x1 = 0; -        y1 = 0; -        for (auto &cell : ctx->cells) { -            Loc l = ctx->getBelLocation(cell.second->bel); -            x0 = std::min(x0, l.x); -            x1 = std::max(x1, l.x); -            y0 = std::min(y0, l.y); -            y1 = std::max(y1, l.y); -            cells.push_back(cell.second.get()); -        } -    } -    void split(Context *ctx, bool yaxis, float pivot, Partition &l, Partition &r) -    { -        std::sort(cells.begin(), cells.end(), [&](CellInfo *a, CellInfo *b) { -            Loc l0 = ctx->getBelLocation(a->bel), l1 = ctx->getBelLocation(b->bel); -            return yaxis ? (l0.y < l1.y) : (l0.x < l1.x); -        }); -        size_t pivot_point = size_t(cells.size() * pivot); -        l.cells.clear(); -        r.cells.clear(); -        l.cells.reserve(pivot_point); -        r.cells.reserve(cells.size() - pivot_point); -        int pivot_coord = (pivot_point == 0) ? (yaxis ? y1 : x1) -                                             : (yaxis ? ctx->getBelLocation(cells.at(pivot_point - 1)->bel).y -                                                      : ctx->getBelLocation(cells.at(pivot_point - 1)->bel).x); -        for (size_t i = 0; i < cells.size(); i++) { -            Loc loc = ctx->getBelLocation(cells.at(i)->bel); -            ((yaxis ? loc.y : loc.x) <= pivot_coord ? l.cells : r.cells).push_back(cells.at(i)); -        } -        if (yaxis) { -            l.x0 = r.x0 = x0; -            l.x1 = r.x1 = x1; -            l.y0 = y0; -            l.y1 = pivot_coord; -            r.y0 = (pivot_coord == y1) ? y1 : (pivot_coord + 1); -            r.y1 = y1; -        } else { -            l.y0 = r.y0 = y0; -            l.y1 = r.y1 = y1; -            l.x0 = x0; -            l.x1 = pivot_coord; -            r.x0 = (pivot_coord == x1) ? x1 : (pivot_coord + 1); -            r.x1 = x1; -        } -    } -}; -typedef int64_t wirelen_t; - -struct NetBB +struct GlobalState : DetailPlacerState  { -    // Actual bounding box -    int x0 = 0, x1 = 0, y0 = 0, y1 = 0; -    // Number of cells at each extremity -    int nx0 = 0, nx1 = 0, ny0 = 0, ny1 = 0; -    wirelen_t hpwl(const ParallelRefineCfg &cfg) const -    { -        return wirelen_t(cfg.hpwl_scale_x * (x1 - x0) + cfg.hpwl_scale_y * (y1 - y0)); -    } -    static NetBB compute(const Context *ctx, const NetInfo *net, const dict<IdString, BelId> *cell2bel = nullptr) -    { -        NetBB result{}; -        if (!net->driver.cell) -            return result; -        auto bel_loc = [&](const CellInfo *cell) { -            BelId bel = cell2bel ? cell2bel->at(cell->name) : cell->bel; -            return ctx->getBelLocation(bel); -        }; -        result.nx0 = result.nx1 = result.ny0 = result.ny1 = 1; -        Loc drv_loc = bel_loc(net->driver.cell); -        result.x0 = result.x1 = drv_loc.x; -        result.y0 = result.y1 = drv_loc.y; -        for (auto &usr : net->users) { -            Loc l = bel_loc(usr.cell); -            if (l.x == result.x0) -                ++result.nx0; // on the edge -            else if (l.x < result.x0) { -                result.x0 = l.x; // extends the edge -                result.nx0 = 1; -            } -            if (l.x == result.x1) -                ++result.nx1; // on the edge -            else if (l.x > result.x1) { -                result.x1 = l.x; // extends the edge -                result.nx1 = 1; -            } -            if (l.y == result.y0) -                ++result.ny0; // on the edge -            else if (l.y < result.y0) { -                result.y0 = l.y; // extends the edge -                result.ny0 = 1; -            } -            if (l.y == result.y1) -                ++result.ny1; // on the edge -            else if (l.y > result.y1) { -                result.y1 = l.y; // extends the edge -                result.ny1 = 1; -            } -        } -        return result; -    } -}; +    explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : DetailPlacerState(ctx, this->cfg), cfg(cfg){}; -struct GlobalState -{ -    explicit GlobalState(Context *ctx, ParallelRefineCfg cfg) : ctx(ctx), cfg(cfg), bels(ctx, false, 64), tmg(ctx){}; -    Context *ctx;      ParallelRefineCfg cfg; -    FastBels bels; -    std::vector<NetInfo *> flat_nets; // flat array of all nets in the design for fast referencing by index -    std::vector<NetBB> last_bounds; -    std::vector<std::vector<double>> last_tmg_costs; -    dict<IdString, NetBB> region_bounds; -    TimingAnalyser tmg; - -    std::shared_timed_mutex archapi_mutex; -      double temperature = 1e-7;      int radius = 3; - -    wirelen_t total_wirelen = 0; -    double total_timing_cost = 0; - -    double get_timing_cost(const NetInfo *net, store_index<PortRef> user, -                           const dict<IdString, BelId> *cell2bel = nullptr) -    { -        if (!net->driver.cell) -            return 0; -        const auto &sink = net->users.at(user); -        IdString driver_pin, sink_pin; -        // Pick the first pin for a prediction; assume all will be similar enouhg -        for (auto pin : ctx->getBelPinsForCellPin(net->driver.cell, net->driver.port)) { -            driver_pin = pin; -            break; -        } -        for (auto pin : ctx->getBelPinsForCellPin(sink.cell, sink.port)) { -            sink_pin = pin; -            break; -        } -        float crit = tmg.get_criticality(CellPortKey(sink)); -        BelId src_bel = cell2bel ? cell2bel->at(net->driver.cell->name) : net->driver.cell->bel; -        BelId dst_bel = cell2bel ? cell2bel->at(sink.cell->name) : sink.cell->bel; -        double delay = ctx->getDelayNS(ctx->predictDelay(src_bel, driver_pin, dst_bel, sink_pin)); -        return delay * std::pow(crit, cfg.crit_exp); -    } - -    bool skip_net(const NetInfo *net) const -    { -        if (!net->driver.cell) -            return true; -        if (ctx->getBelGlobalBuf(net->driver.cell->bel)) -            return true; -        return false; -    } -    bool timing_skip_net(const NetInfo *net) const -    { -        if (!net->driver.cell) -            return true; -        int cc; -        auto cls = ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc); -        if (cls == TMG_IGNORE || cls == TMG_GEN_CLOCK) -            return true; -        return false; -    }      // ....  }; -struct ThreadState +struct ThreadState : DetailPlacerThreadState  { -    Context *ctx;         // Nextpnr context pointer -    GlobalState &g;       // Refinement engine state -    int idx;              // Index of the thread -    DeterministicRNG rng; // Local RNG -    // The cell partition that the thread works on -    Partition p; -    // Mapping from design-wide net index to thread-wide net index -- not all nets are in all partitions, so we can -    // optimise -    std::vector<int> thread_net_idx; -    // List of nets inside the partition; and their committed bounding boxes & timing costs from the thread's -    // perspective -    std::vector<NetInfo *> thread_nets; -    std::vector<NetBB> net_bounds; -    std::vector<std::vector<double>> arc_tmg_cost; -    std::vector<bool> ignored_nets, tmg_ignored_nets; -    bool arch_state_dirty = false; -    // Our local cell-bel map; that won't be affected by out-of-partition moves -    dict<IdString, BelId> local_cell2bel; - -    // Data on an inflight move -    dict<IdString, std::pair<BelId, BelId>> moved_cells; // cell -> (old; new) -    // For cluster moves only -    std::vector<std::pair<CellInfo *, Loc>> cell_rel; -    // For incremental wirelength and delay updates -    wirelen_t wirelen_delta = 0; -    double timing_delta = 0; - +    ThreadState(Context *ctx, GlobalState &g, int idx) : DetailPlacerThreadState(ctx, g, idx), g(g){};      // Total made and accepted moved +    GlobalState &g;      int n_move = 0, n_accept = 0; -    enum BoundChange -    { -        NO_CHANGE, -        CELL_MOVED_INWARDS, -        CELL_MOVED_OUTWARDS, -        FULL_RECOMPUTE -    }; -    // Wirelen related are handled on a per-axis basis to reduce -    struct AxisChanges -    { -        std::vector<int> bounds_changed_nets; -        std::vector<BoundChange> already_bounds_changed; -    }; -    std::array<AxisChanges, 2> axes; -    std::vector<NetBB> new_net_bounds; - -    std::vector<std::vector<bool>> already_timing_changed; -    std::vector<std::pair<int, store_index<PortRef>>> timing_changed_arcs; -    std::vector<double> new_timing_costs; - -    ThreadState(Context *ctx, GlobalState &g, int idx) : ctx(ctx), g(g), idx(idx){}; -    void set_partition(const Partition &part) -    { -        p = part; -        thread_nets.clear(); -        thread_net_idx.resize(g.flat_nets.size()); -        std::fill(thread_net_idx.begin(), thread_net_idx.end(), -1); -        // Determine the set of nets that are within the thread; and therefore we care about -        for (auto thread_cell : part.cells) { -            for (auto &port : thread_cell->ports) { -                if (!port.second.net) -                    continue; -                int global_idx = port.second.net->udata; -                auto &thread_idx = thread_net_idx.at(global_idx); -                // Already added to the set -                if (thread_idx != -1) -                    continue; -                thread_idx = thread_nets.size(); -                thread_nets.push_back(port.second.net); -            } -        } -        tmg_ignored_nets.clear(); -        ignored_nets.clear(); -        for (auto tn : thread_nets) { -            ignored_nets.push_back(g.skip_net(tn)); -            tmg_ignored_nets.push_back(g.timing_skip_net(tn)); -        } -        // Set up the original cell-bel map for all nets inside the thread -        local_cell2bel.clear(); -        for (NetInfo *net : thread_nets) { -            if (net->driver.cell) -                local_cell2bel[net->driver.cell->name] = net->driver.cell->bel; -            for (auto &usr : net->users) -                local_cell2bel[usr.cell->name] = usr.cell->bel; -        } -    } -    void setup_initial_state() -    { -        // Setup initial net bounding boxes and timing costs -        net_bounds.clear(); -        arc_tmg_cost.clear(); -        for (auto tn : thread_nets) { -            net_bounds.push_back(g.last_bounds.at(tn->udata)); -            arc_tmg_cost.push_back(g.last_tmg_costs.at(tn->udata)); -        } -        new_net_bounds = net_bounds; -        for (int j = 0; j < 2; j++) { -            auto &a = axes.at(j); -            a.already_bounds_changed.resize(net_bounds.size()); -        } -        already_timing_changed.clear(); -        already_timing_changed.resize(net_bounds.size()); -        for (size_t i = 0; i < thread_nets.size(); i++) -            already_timing_changed.at(i) = std::vector<bool>(thread_nets.at(i)->users.capacity()); -    } -    bool bounds_check(BelId bel) -    { -        Loc l = ctx->getBelLocation(bel); -        if (l.x < p.x0 || l.x > p.x1 || l.y < p.y0 || l.y > p.y1) -            return false; -        return true; -    } -    bool bind_move() -    { -        std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); -        for (auto &entry : moved_cells) { -            ctx->unbindBel(entry.second.first); -        } -        bool success = true; -        for (auto &entry : moved_cells) { -            // Make sure targets are available before we bind them -            if (!ctx->checkBelAvail(entry.second.second)) { -                success = false; -                break; -            } -            ctx->bindBel(entry.second.second, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); -        } -        arch_state_dirty = true; -        return success; -    } -    bool check_validity() -    { -        std::shared_lock<std::shared_timed_mutex> l(g.archapi_mutex); -        bool result = true; -        for (auto e : moved_cells) { -            if (!ctx->isBelLocationValid(e.second.first)) { -                // Have to check old; too; as unbinding a bel could make a placement illegal by virtue of no longer -                // enabling dedicated routes to be used -                result = false; -                break; -            } -            if (!ctx->isBelLocationValid(e.second.second)) { -                result = false; -                break; -            } -        } -        return result; -    } -    void revert_move() -    { -        if (arch_state_dirty) { -            // If changes to the arch state were made, revert them by restoring original cell bindings -            std::unique_lock<std::shared_timed_mutex> l(g.archapi_mutex); -            for (auto &entry : moved_cells) { -                BelId curr_bound = ctx->cells.at(entry.first)->bel; -                if (curr_bound != BelId()) -                    ctx->unbindBel(curr_bound); -            } -            for (auto &entry : moved_cells) { -                ctx->bindBel(entry.second.first, ctx->cells.at(entry.first).get(), STRENGTH_WEAK); -            } -            arch_state_dirty = false; -        } -        for (auto &entry : moved_cells) -            local_cell2bel[entry.first] = entry.second.first; -    } -    void commit_move() -    { -        arch_state_dirty = false; -        for (auto &axis : axes) { -            for (auto bc : axis.bounds_changed_nets) { -                // Commit updated net bounds -                net_bounds.at(bc) = new_net_bounds.at(bc); -            } -        } -        if (g.cfg.timing_driven) { -            NPNR_ASSERT(timing_changed_arcs.size() == new_timing_costs.size()); -            for (size_t i = 0; i < timing_changed_arcs.size(); i++) { -                auto arc = timing_changed_arcs.at(i); -                arc_tmg_cost.at(arc.first).at(arc.second.idx()) = new_timing_costs.at(i); -            } -        } -    } -    void compute_changes_for_cell(CellInfo *cell, BelId old_bel, BelId new_bel) -    { -        Loc new_loc = ctx->getBelLocation(new_bel); -        Loc old_loc = ctx->getBelLocation(old_bel); -        for (const auto &port : cell->ports) { -            NetInfo *pn = port.second.net; -            if (!pn) -                continue; -            int idx = thread_net_idx.at(pn->udata); -            if (ignored_nets.at(idx)) -                continue; -            NetBB &new_bounds = new_net_bounds.at(idx); -            // For the x-axis (i=0) and y-axis (i=1) -            for (int i = 0; i < 2; i++) { -                auto &axis = axes.at(i); -                // New and old on this axis -                int new_pos = i ? new_loc.y : new_loc.x, old_pos = i ? old_loc.y : old_loc.x; -                // References to updated bounding box entries -                auto &b0 = i ? new_bounds.y0 : new_bounds.x0; -                auto &n0 = i ? new_bounds.ny0 : new_bounds.nx0; -                auto &b1 = i ? new_bounds.y1 : new_bounds.x1; -                auto &n1 = i ? new_bounds.ny1 : new_bounds.nx1; -                auto &change = axis.already_bounds_changed.at(idx); -                // Lower bound -                if (new_pos < b0) { -                    // Further out than current lower bound -                    b0 = new_pos; -                    n0 = 1; -                    if (change == NO_CHANGE) { -                        change = CELL_MOVED_OUTWARDS; -                        axis.bounds_changed_nets.push_back(idx); -                    } -                } else if (new_pos == b0 && old_pos > b0) { -                    // Moved from inside into current bound -                    ++n0; -                    if (change == NO_CHANGE) { -                        change = CELL_MOVED_OUTWARDS; -                        axis.bounds_changed_nets.push_back(idx); -                    } -                } else if (old_pos == b0 && new_pos > b0) { -                    // Moved from current bound to inside -                    if (change == NO_CHANGE) -                        axis.bounds_changed_nets.push_back(idx); -                    if (n0 == 1) { -                        // Was the last cell on the bound; have to do a full recompute -                        change = FULL_RECOMPUTE; -                    } else { -                        --n0; -                        if (change == NO_CHANGE) -                            change = CELL_MOVED_INWARDS; -                    } -                } -                // Upper bound -                if (new_pos > b1) { -                    // Further out than current upper bound -                    b1 = new_pos; -                    n1 = new_pos; -                    if (change == NO_CHANGE) { -                        change = CELL_MOVED_OUTWARDS; -                        axis.bounds_changed_nets.push_back(idx); -                    } -                } else if (new_pos == b1 && old_pos < b1) { -                    // Moved onto current bound -                    ++n1; -                    if (change == NO_CHANGE) { -                        change = CELL_MOVED_OUTWARDS; -                        axis.bounds_changed_nets.push_back(idx); -                    } -                } else if (old_pos == b1 && new_pos < b1) { -                    // Moved from current bound to inside -                    if (change == NO_CHANGE) -                        axis.bounds_changed_nets.push_back(idx); -                    if (n1 == 1) { -                        // Was the last cell on the bound; have to do a full recompute -                        change = FULL_RECOMPUTE; -                    } else { -                        --n1; -                        if (change == NO_CHANGE) -                            change = CELL_MOVED_INWARDS; -                    } -                } -            } -            // Timing updates if timing driven -            if (g.cfg.timing_driven && !tmg_ignored_nets.at(idx)) { -                if (port.second.type == PORT_OUT) { -                    int cc; -                    TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); -                    if (cls != TMG_IGNORE) { -                        for (auto usr : pn->users.enumerate()) -                            if (!already_timing_changed.at(idx).at(usr.index.idx())) { -                                timing_changed_arcs.emplace_back(std::make_pair(idx, usr.index)); -                                already_timing_changed.at(idx).at(usr.index.idx()) = true; -                            } -                    } -                } else { -                    auto usr = port.second.user_idx; -                    if (!already_timing_changed.at(idx).at(usr.idx())) { -                        timing_changed_arcs.emplace_back(std::make_pair(idx, usr)); -                        already_timing_changed.at(idx).at(usr.idx()) = true; -                    } -                } -            } -        } -    } -    void compute_total_change() -    { -        auto &xa = axes.at(0), &ya = axes.at(1); -        for (auto &bc : xa.bounds_changed_nets) -            if (xa.already_bounds_changed.at(bc) == FULL_RECOMPUTE) -                new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); -        for (auto &bc : ya.bounds_changed_nets) -            if (xa.already_bounds_changed.at(bc) != FULL_RECOMPUTE && -                ya.already_bounds_changed.at(bc) == FULL_RECOMPUTE) -                new_net_bounds.at(bc) = NetBB::compute(ctx, thread_nets.at(bc), &local_cell2bel); -        for (auto &bc : xa.bounds_changed_nets) -            wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); -        for (auto &bc : ya.bounds_changed_nets) -            if (xa.already_bounds_changed.at(bc) == NO_CHANGE) -                wirelen_delta += (new_net_bounds.at(bc).hpwl(g.cfg) - net_bounds.at(bc).hpwl(g.cfg)); -        if (g.cfg.timing_driven) { -            NPNR_ASSERT(new_timing_costs.empty()); -            for (auto arc : timing_changed_arcs) { -                double new_cost = g.get_timing_cost(thread_nets.at(arc.first), arc.second, &local_cell2bel); -                timing_delta += (new_cost - arc_tmg_cost.at(arc.first).at(arc.second.idx())); -                new_timing_costs.push_back(new_cost); -            } -        } -    } -    void reset_move_state() -    { -        moved_cells.clear(); -        cell_rel.clear(); -        for (auto &axis : axes) { -            for (auto bc : axis.bounds_changed_nets) { -                new_net_bounds.at(bc) = net_bounds.at(bc); -                axis.already_bounds_changed[bc] = NO_CHANGE; -            } -            axis.bounds_changed_nets.clear(); -        } -        for (auto &arc : timing_changed_arcs) { -            already_timing_changed.at(arc.first).at(arc.second.idx()) = false; -        } -        timing_changed_arcs.clear(); -        new_timing_costs.clear(); -        wirelen_delta = 0; -        timing_delta = 0; -    } -      bool accept_move()      {          static constexpr double epsilon = 1e-20; @@ -553,19 +61,6 @@ struct ThreadState                 (g.temperature > 1e-8 && (rng.rng() / float(0x3fffffff)) <= std::exp(-delta / g.temperature));      } -    bool add_to_move(CellInfo *cell, BelId old_bel, BelId new_bel) -    { -        if (!bounds_check(old_bel) || !bounds_check(new_bel)) -            return false; -        if (!ctx->isValidBelForCellType(cell->type, new_bel)) -            return false; -        NPNR_ASSERT(!moved_cells.count(cell->name)); -        moved_cells[cell->name] = std::make_pair(old_bel, new_bel); -        local_cell2bel[cell->name] = new_bel; -        compute_changes_for_cell(cell, old_bel, new_bel); -        return true; -    } -      bool single_cell_swap(CellInfo *cell, BelId new_bel)      {          NPNR_ASSERT(moved_cells.empty()); @@ -806,14 +301,14 @@ struct ParallelRefine              g.bels.addCellType(cell_type);          }      }; -    std::vector<Partition> parts; +    std::vector<PlacePartition> parts;      void do_partition()      {          parts.clear();          parts.emplace_back(ctx);          bool yaxis = false;          while (parts.size() < t.size()) { -            std::vector<Partition> next(parts.size() * 2); +            std::vector<PlacePartition> next(parts.size() * 2);              for (size_t i = 0; i < parts.size(); i++) {                  // Randomly permute pivot every iteration so we get different thread boundaries                  const float delta = 0.1; @@ -834,28 +329,6 @@ struct ParallelRefine              w.join();      } -    void update_global_costs() -    { -        g.last_bounds.resize(g.flat_nets.size()); -        g.last_tmg_costs.resize(g.flat_nets.size()); -        g.total_wirelen = 0; -        g.total_timing_cost = 0; -        for (size_t i = 0; i < g.flat_nets.size(); i++) { -            NetInfo *ni = g.flat_nets.at(i); -            if (g.skip_net(ni)) -                continue; -            g.last_bounds.at(i) = NetBB::compute(ctx, ni); -            g.total_wirelen += g.last_bounds.at(i).hpwl(g.cfg); -            if (!g.timing_skip_net(ni)) { -                auto &tc = g.last_tmg_costs.at(i); -                tc.resize(ni->users.capacity()); -                for (auto usr : ni->users.enumerate()) { -                    tc.at(usr.index.idx()) = g.get_timing_cost(ni, usr.index); -                    g.total_timing_cost += tc.at(usr.index.idx()); -                } -            } -        } -    }      void run()      { @@ -868,7 +341,7 @@ struct ParallelRefine          log_info("Running parallel refinement with %d threads.\n", int(t.size()));          int iter = 1;          bool done = false; -        update_global_costs(); +        g.update_global_costs();          double avg_wirelen = g.total_wirelen;          wirelen_t min_wirelen = g.total_wirelen;          while (true) { @@ -914,7 +387,7 @@ struct ParallelRefine              for (auto &w : workers)                  w.join();              g.tmg.run(); -            update_global_costs(); +            g.update_global_costs();              iter++;              ctx->yield();          } @@ -924,17 +397,14 @@ struct ParallelRefine  };  } // namespace -ParallelRefineCfg::ParallelRefineCfg(Context *ctx) +ParallelRefineCfg::ParallelRefineCfg(Context *ctx) : DetailPlaceCfg(ctx)  { -    timing_driven = ctx->setting<bool>("timing_driven");      threads = ctx->setting<int>("threads", 8);      // snap to nearest power of two; and minimum thread size      int actual_threads = 1;      while ((actual_threads * 2) <= threads && (int(ctx->cells.size()) / (actual_threads * 2)) >= min_thread_size)          actual_threads *= 2;      threads = actual_threads; -    hpwl_scale_x = 1; -    hpwl_scale_y = 1;  }  bool parallel_refine(Context *ctx, ParallelRefineCfg cfg) diff --git a/common/place/parallel_refine.h b/common/place/parallel_refine.h index 556317cd..a6c8b469 100644 --- a/common/place/parallel_refine.h +++ b/common/place/parallel_refine.h @@ -20,18 +20,16 @@  #ifndef PARALLEL_REFINE_H  #define PARALLEL_REFINE_H +#include "detail_place_cfg.h"  #include "nextpnr.h"  NEXTPNR_NAMESPACE_BEGIN -struct ParallelRefineCfg +struct ParallelRefineCfg : DetailPlaceCfg  {      ParallelRefineCfg(Context *ctx); -    bool timing_driven;      int threads; -    int hpwl_scale_x, hpwl_scale_y;      double lambda = 0.5f; -    float crit_exp = 8;      int inner_iters = 15;      int min_thread_size = 500;  }; | 
