From 4d2906378f36cd0131fc1a8dd30ad40980d4c0bb Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 11 Jan 2019 11:59:34 +0000 Subject: HeAP: Region finder for spreading and strict legaliser Signed-off-by: David Shah --- common/placer_heap.cc | 490 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 454 insertions(+), 36 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index d3ba63bc..6cd0459d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -26,6 +26,7 @@ #include #include +#include #include #include "log.h" #include "nextpnr.h" @@ -125,12 +126,12 @@ class HeAPPlacer for (int i = 0; i < 20; i++) { setup_solve_cells(); - EquationSystem esx(place_cells.size(), place_cells.size()); + EquationSystem esx(solve_cells.size(), solve_cells.size()); build_equations(esx, false); // log_info("x-axis\n"); solve_equations(esx, false); - EquationSystem esy(place_cells.size(), place_cells.size()); + EquationSystem esy(solve_cells.size(), solve_cells.size()); build_equations(esy, true); // log_info("y-axis\n"); solve_equations(esy, true); @@ -141,6 +142,48 @@ class HeAPPlacer log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); } + // legalise_with_cuts(true); + CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); + NPNR_ASSERT(false); + + bool valid = false; + wirelen_t solved_hpwl = 0, legal_hpwl = 1, best_hpwl = std::numeric_limits::max(); + int iter = 0, stalled = 0; + while (!valid || (stalled < 5 && (solved_hpwl < legal_hpwl * 0.8))) { + if ((solved_hpwl < legal_hpwl * 0.8) || (stalled > 5)) { + stalled = 0; + best_hpwl = std::numeric_limits::max(); + valid = true; + } + setup_solve_cells(); + + EquationSystem esx(solve_cells.size(), solve_cells.size()); + build_equations(esx, false, iter); + // log_info("x-axis\n"); + solve_equations(esx, false); + + EquationSystem esy(solve_cells.size(), solve_cells.size()); + build_equations(esy, true, iter); + // log_info("y-axis\n"); + solve_equations(esy, true); + solved_hpwl = total_hpwl(); + log_info("Solved HPWL = %d\n", int(solved_hpwl)); + + update_all_chains(); + legalise_placement_simple(valid); + update_all_chains(); + + legal_hpwl = total_hpwl(); + log_info("Legalised HPWL = %d\n", int(legal_hpwl)); + if (legal_hpwl < best_hpwl) { + best_hpwl = legal_hpwl; + stalled = 0; + } else { + ++stalled; + } + ctx->yield(); + ++iter; + } ctx->unlock(); return true; } @@ -177,7 +220,8 @@ class HeAPPlacer // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent // where chains are within chains - std::unordered_map chain_root; + std::unordered_map chain_root; + std::unordered_map chain_size; // The offset from chain_root to a cell in the chain std::unordered_map> cell_offsets; @@ -267,22 +311,22 @@ class HeAPPlacer // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once // another row/col is already recorded as being nearer for (int x = loc.x; x <= max_x; x++) { - if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (x - loc.x)) + if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (x - loc.x)) break; nc.at(x) = loc.x; } for (int x = loc.x - 1; x >= 0; x--) { - if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (loc.x - x)) + if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (loc.x - x)) break; nc.at(x) = loc.x; } for (int y = loc.y; y <= max_y; y++) { - if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (y - loc.y)) + if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (y - loc.y)) break; nr.at(y) = loc.y; } for (int y = loc.y - 1; y >= 0; y--) { - if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (loc.y - y)) + if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (loc.y - y)) break; nr.at(y) = loc.y; } @@ -343,7 +387,8 @@ class HeAPPlacer } // Setup the cells to be solved, returns the number of rows - int setup_solve_cells(std::unordered_set *celltypes = nullptr) { + int setup_solve_cells(std::unordered_set *celltypes = nullptr) + { int row = 0; solve_cells.clear(); // First clear the udata of all cells @@ -367,6 +412,7 @@ class HeAPPlacer { const auto &base = cell_locs[cell->name]; for (auto child : cell->constr_children) { + chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) cell_locs[child->name].x = base.x + child->constr_x; else @@ -385,6 +431,7 @@ class HeAPPlacer void update_all_chains() { for (auto cell : place_cells) { + chain_size[cell->name] = 1; if (!cell->constr_children.empty()) update_chain(cell, cell); } @@ -400,7 +447,7 @@ class HeAPPlacer } // Build the system of equations for either X or Y - void build_equations(EquationSystem &es, bool yaxis) + void build_equations(EquationSystem &es, bool yaxis, int iter = -1) { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; @@ -443,7 +490,9 @@ class HeAPPlacer es.add_rhs(row, -v_pos * weight); } if (cell_offsets.count(var.cell->name)) { - es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second : cell_offsets.at(var.cell->name).first) * weight); + es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second + : cell_offsets.at(var.cell->name).first) * + weight); } }; @@ -465,33 +514,20 @@ class HeAPPlacer stamp_equation(port, *other, -weight); stamp_equation(*other, *other, weight); stamp_equation(*other, port, -weight); - -/* - if (port.cell->udata != -1) { - es.add_coeff(port.cell->udata, port.cell->udata, weight); - if (!cell_locs.at(other->cell->name).locked) - es.add_coeff(other->cell->udata, port.cell->udata, -weight); - } else { - // Add our fixed position to the other end's RHS - if (!cell_locs.at(other->cell->name).locked) - es.add_rhs(other->cell->udata, this_pos * weight); - } - // Opposite for the other end of the connection - if (!cell_locs.at(other->cell->name).locked) { - es.add_coeff(other->cell->udata, other->cell->udata, weight); - if (!cell_locs.at(port.cell->name).locked) - es.add_coeff(port.cell->udata, other->cell->udata, -weight); - } else { - // Add our fixed position to the other end's RHS - if (!cell_locs.at(port.cell->name).locked) - es.add_rhs(port.cell->udata, this_pos * weight); - } -*/ }; process_arc(lbport); process_arc(ubport); }); } + if (iter != -1) { + const float alpha = 0.3; + float weight = alpha * iter; + for (size_t row = 0; row < solve_cells.size(); row++) { + // Add an arc from legalised to current position + es.add_coeff(row, row, weight); + es.add_rhs(row, weight * cell_pos(solve_cells.at(row))); + } + } } // Build the system of equations for either X or Y @@ -499,15 +535,14 @@ class HeAPPlacer { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; - build_equations(es, yaxis); std::vector vals; - std::transform(place_cells.begin(), place_cells.end(), std::back_inserter(vals), cell_pos); + std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); es.solve(vals); for (size_t i = 0; i < vals.size(); i++) if (yaxis) - cell_locs.at(place_cells.at(i)->name).y = int(vals.at(i) + 0.5); + cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i) + 0.5))); else - cell_locs.at(place_cells.at(i)->name).x = int(vals.at(i) + 0.5); + cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i) + 0.5))); } // Compute HPWL @@ -534,6 +569,389 @@ class HeAPPlacer return hpwl; } + // Swap the Bel of a cell with another, return the original location + BelId swap_cell_bels(CellInfo *cell, BelId newBel) + { + BelId oldBel = cell->bel; + CellInfo *bound = ctx->getBoundBelCell(newBel); + if (bound != nullptr) + ctx->unbindBel(newBel); + ctx->unbindBel(oldBel); + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + if (bound != nullptr) + ctx->bindBel(oldBel, bound, STRENGTH_WEAK); + return oldBel; + } + + // Placement legalisation + // Note that there are *two meanings* of legalisation in nextpnr placement + // The first kind, as in HeAP, simply ensures that there is no overlap (each Bel maps only to one cell) + // The second kind also ensures that validity rules (isValidBelForCell) are met, because there is no guarantee + // in nextpnr that Bels are freely swappable (indeed many a architectures Bel is a logic cell with complex + // validity rules for control sets, etc, rather than a CLB/tile as in a more conventional pack&place flow) + void legalise_placement_simple(bool require_validity = false) + { + // Unbind all cells placed in this solution + for (auto cell : sorted(ctx->cells)) { + CellInfo *ci = cell.second; + if (ci->udata != dont_solve && ci->bel != BelId()) + ctx->unbindBel(ci->bel); + } + + // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using + // the simple greedy largest-macro-first approach. + std::priority_queue> remaining; + for (auto cell : solve_cells) { + remaining.emplace(chain_size[cell->name], cell->name); + } + + while (!remaining.empty()) { + auto top = remaining.top(); + remaining.pop(); + + CellInfo *ci = ctx->cells.at(top.second).get(); + // Was now placed, ignore + if (ci->bel != BelId()) + continue; + // log_info(" Legalising %s\n", top.second.c_str(ctx)); + int bt = std::get<0>(bel_types.at(ci->type)); + auto &fb = fast_bels.at(bt); + int radius = 0; + int iter = 0; + bool placed = false; + while (!placed) { + + int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); + int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); + + iter++; + if ((iter % (20 * (radius + 1))) == 0) + radius = std::min(std::max(max_x, max_y), radius + 1); + + if (nx < 0 || nx > max_x) + continue; + if (ny < 0 || ny > max_x) + continue; + + // ny = nearest_row_with_bel.at(bt).at(ny); + // nx = nearest_col_with_bel.at(bt).at(nx); + + if (nx >= int(fb.size())) + continue; + if (ny >= int(fb.at(nx).size())) + continue; + if (fb.at(nx).at(ny).empty()) + continue; + + if (ci->constr_children.empty()) { + for (auto sz : fb.at(nx).at(ny)) { + if (ctx->checkBelAvail(sz) || radius > (max_x / 4)) { + CellInfo *bound = ctx->getBoundBelCell(sz); + if (bound != nullptr) { + if (bound->constr_parent != nullptr || !bound->constr_children.empty()) + continue; + ctx->unbindBel(bound->bel); + remaining.emplace(chain_size[bound->name], bound->name); + } + ctx->bindBel(sz, ci, STRENGTH_WEAK); + if (require_validity && !ctx->isBelLocationValid(sz)) { + ctx->unbindBel(sz); + if (bound != nullptr) + ctx->bindBel(sz, bound, STRENGTH_WEAK); + } else { + Loc loc = ctx->getBelLocation(sz); + cell_locs[ci->name].x = loc.x; + cell_locs[ci->name].y = loc.y; + placed = true; + break; + } + } + } + } else { + // FIXME + NPNR_ASSERT(false); + } + } + } + } + + static constexpr float beta = 0.9; + + struct ChainExtent + { + int x0, y0, x1, y1; + }; + + struct LegaliserRegion + { + int id; + int x0, y0, x1, y1; + int cells, bels; + std::unordered_set included_chains; + bool overused() const + { + if (bels < 4) + return cells > bels; + else + return cells > beta * bels; + } + }; + + class CutLegaliser + { + public: + CutLegaliser(HeAPPlacer *p, IdString beltype) + : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) + { + } + + void run() + { + init(); + find_overused_regions(); + expand_regions(); + for (auto &r : regions) { + if (!merged_regions.count(r.id)) + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); + } + } + + private: + HeAPPlacer *p; + Context *ctx; + IdString beltype; + std::vector> occupancy; + std::vector> groups; + std::vector> chaines; + std::vector>> &fb; + + std::vector regions; + std::unordered_set merged_regions; + + int occ_at(int x, int y) { return occupancy.at(x).at(y); } + + int bels_at(int x, int y) + { + if (x >= int(fb.size()) || y >= int(fb.at(x).size())) + return 0; + return int(fb.at(x).at(y).size()); + } + + void init() + { + occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); + groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); + chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); + + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + occupancy.at(x).at(y) = 0; + groups.at(x).at(y) = -1; + chaines.at(x).at(y) = {x, y, x, y}; + } + + std::map cr_extents; + + auto set_chain_ext = [&](IdString cell, int x, int y) { + if (!cr_extents.count(cell)) + cr_extents[cell] = {x, y, x, y}; + else { + cr_extents[cell].x0 = std::min(cr_extents[cell].x0, x); + cr_extents[cell].y0 = std::min(cr_extents[cell].y0, y); + cr_extents[cell].x1 = std::max(cr_extents[cell].x1, x); + cr_extents[cell].y1 = std::max(cr_extents[cell].y1, y); + } + }; + + for (auto &cell : p->cell_locs) { + if (ctx->cells.at(cell.first)->type == beltype) + occupancy.at(cell.second.x).at(cell.second.y)++; + // Compute ultimate extent of each chain root + if (p->chain_root.count(cell.first)) { + set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y); + } else if (!ctx->cells.at(cell.first)->constr_children.empty()) { + set_chain_ext(cell.first, cell.second.x, cell.second.y); + } + } + for (auto &cell : p->cell_locs) { + // Transfer chain extents to the actual chaines structure + ChainExtent *ce = nullptr; + if (p->chain_root.count(cell.first)) + ce = &(cr_extents.at(p->chain_root.at(cell.first)->name)); + else if (!ctx->cells.at(cell.first)->constr_children.empty()) + ce = &(cr_extents.at(cell.first)); + if (ce) { + auto &lce = chaines.at(cell.second.x).at(cell.second.y); + lce.x0 = std::min(lce.x0, ce->x0); + lce.y0 = std::min(lce.y0, ce->y0); + lce.x1 = std::max(lce.x1, ce->x1); + lce.y1 = std::max(lce.y1, ce->y1); + } + } + } + void merge_regions(LegaliserRegion &merged, LegaliserRegion &mergee) + { + // Prevent grow_region from recursing while doing this + for (int x = mergee.x0; x <= mergee.x1; x++) + for (int y = mergee.y0; y <= mergee.y1; y++) { + // log_info("%d %d\n", groups.at(x).at(y), mergee.id); + NPNR_ASSERT(groups.at(x).at(y) == mergee.id); + groups.at(x).at(y) = merged.id; + merged.cells += occ_at(x, y); + merged.bels += bels_at(x, y); + } + merged_regions.insert(mergee.id); + grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); + } + + void grow_region(LegaliserRegion &r, int x0, int y0, int x1, int y1, bool init = false) + { + // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); + if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) + return; + int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1; + r.x0 = std::min(r.x0, x0); + r.y0 = std::min(r.y0, y0); + r.x1 = std::max(r.x1, x1); + r.y1 = std::max(r.y1, y1); + + auto process_location = [&](int x, int y) { + // Merge with any overlapping regions + if (groups.at(x).at(y) != r.id) { + r.bels += bels_at(x, y); + r.cells += occ_at(x, y); + } + if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) + merge_regions(r, regions.at(groups.at(x).at(y))); + groups.at(x).at(y) = r.id; + // Grow to cover any chains + auto &chaine = chaines.at(x).at(y); + grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1); + }; + for (int x = r.x0; x < old_x0; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int x = old_x1 + 1; x <= x1; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int y = r.y0; y < old_y0; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + for (int y = old_y1 + 1; y <= r.y1; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + } + + void find_overused_regions() + { + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + // Either already in a group, or not overutilised. Ignore + if (groups.at(x).at(y) != -1 || (occ_at(x, y) <= bels_at(x, y))) + continue; + // log_info("%d %d %d\n", x, y, occ_at(x, y)); + int id = int(regions.size()); + groups.at(x).at(y) = id; + LegaliserRegion reg; + reg.id = id; + reg.x0 = reg.x1 = x; + reg.y0 = reg.y1 = y; + reg.bels = bels_at(x, y); + reg.cells = occ_at(x, y); + // Make sure we cover carries, etc + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); + + bool expanded = true; + while (expanded) { + expanded = false; + // Keep trying expansion in x and y, until we find no over-occupancy cells + // or hit grouped cells + + // First try expanding in x + if (reg.x1 < p->max_x) { + bool over_occ_x = false; + for (int y1 = reg.y0; y1 <= reg.y1; y1++) { + if (occ_at(reg.x1 + 1, y1) > bels_at(reg.x1 + 1, y1)) { + // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1), + // bels_at(reg.x1 + 1, y1)); + over_occ_x = true; + break; + } + } + if (over_occ_x) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + } + } + + if (reg.y1 < p->max_y) { + bool over_occ_y = false; + for (int x1 = reg.x0; x1 <= reg.x1; x1++) { + if (occ_at(x1, reg.y1 + 1) > bels_at(x1, reg.y1 + 1)) { + // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1), + // bels_at(x1, reg.y1 + 1)); + over_occ_y = true; + break; + } + } + if (over_occ_y) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + } + } + } + regions.push_back(reg); + } + } + + void expand_regions() + { + std::queue overu_regions; + for (auto &r : regions) { + if (!merged_regions.count(r.id) && r.overused()) + overu_regions.push(r.id); + } + while (!overu_regions.empty()) { + int rid = overu_regions.front(); + overu_regions.pop(); + if (merged_regions.count(rid)) + continue; + auto ® = regions.at(rid); + while (reg.overused()) { + bool changed = false; + if (reg.x0 > 0) { + grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.x1 < p->max_x) { + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.y0 > 0) { + grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.y1 < p->max_y) { + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + changed = true; + if (!reg.overused()) + break; + } + if (!changed) + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, + reg.y1, reg.cells, beltype.c_str(ctx)); + } + } + } + }; + typedef decltype(CellInfo::udata) cell_udata_t; cell_udata_t dont_solve = std::numeric_limits::max(); }; -- cgit v1.2.3