diff options
author | David Shah <davey1576@gmail.com> | 2019-02-25 12:46:06 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-25 12:46:06 +0000 |
commit | 031725c80eb6c2c0f922b6fa5be57c42330a8a3b (patch) | |
tree | 49215217265802745407465c41f9b493e74f47b1 | |
parent | e87fb696653262bea08caa100f0a5d4d31d2a310 (diff) | |
parent | df79d94944b4d92207be7ddedc6424b7c931f313 (diff) | |
download | nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.tar.gz nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.tar.bz2 nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.zip |
Merge pull request #242 from YosysHQ/ecp5next
ecp5: Fixes and QoR improvements
-rw-r--r-- | common/timing.cc | 9 | ||||
-rw-r--r-- | ecp5/arch.cc | 91 | ||||
-rw-r--r-- | ecp5/arch.h | 4 | ||||
-rw-r--r-- | ecp5/arch_place.cc | 74 | ||||
-rw-r--r-- | ecp5/archdefs.h | 2 | ||||
-rw-r--r-- | ecp5/globals.cc | 19 | ||||
-rw-r--r-- | ecp5/pack.cc | 67 | ||||
-rwxr-xr-x | ecp5/trellis_import.py | 10 |
8 files changed, 238 insertions, 38 deletions
diff --git a/common/timing.cc b/common/timing.cc index 64dcdf71..8ade2660 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -611,8 +611,9 @@ struct Timing continue; delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { - float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); - nc.criticality.at(i) = criticality; + float criticality = + 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); + nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality)); } nc.max_path_length = nd.max_path_length; nc.cd_worst_slack = worst_slack.at(startdomain.first); @@ -837,6 +838,10 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p auto cursor = sink_wire; delay_t delay; while (driver_wire != cursor) { +#ifdef ARCH_ECP5 + if (net->is_global) + break; +#endif auto it = net->wires.find(cursor); assert(it != net->wires.end()); auto pip = it->second.pip; diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 7de5c7aa..da0f7b1a 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -427,44 +427,90 @@ BelId Arch::getBelByLocation(Loc loc) const delay_t Arch::estimateDelay(WireId src, WireId dst) const { - auto est_location = [&](WireId w) -> std::pair<int16_t, int16_t> { - if (w.location.x == 0 && w.location.y == 0) { - // Global wires - const auto &wire = locInfo(w)->wire_data[w.index]; - // Use location of first downhill bel or pip, if available - if (wire.num_bel_pins > 0) { - return std::make_pair(wire.bel_pins[0].rel_bel_loc.x, wire.bel_pins[0].rel_bel_loc.y); - } else if (wire.num_downhill > 0) { - return std::make_pair(wire.pips_downhill[0].rel_loc.x, wire.pips_downhill[0].rel_loc.y); - } else if (wire.num_uphill > 0) { - return std::make_pair(wire.pips_uphill[0].rel_loc.x, wire.pips_uphill[0].rel_loc.y); - } else { - return std::make_pair<int16_t, int16_t>(0, 0); - } + WireId cursor = dst; + + int num_uh = locInfo(dst)->wire_data[dst.index].num_uphill; + if (num_uh < 6) { + for (auto uh : getPipsUphill(dst)) { + if (getPipSrcWire(uh) == src) + return getPipDelay(uh).maxDelay(); + } + } + + auto est_location = [&](WireId w) -> std::pair<int, int> { + const auto &wire = locInfo(w)->wire_data[w.index]; + if (wire.num_bel_pins > 0) { + return std::make_pair(w.location.x + wire.bel_pins[0].rel_bel_loc.x, + w.location.y + wire.bel_pins[0].rel_bel_loc.y); + } else if (wire.num_downhill > 0) { + return std::make_pair(w.location.x + wire.pips_downhill[0].rel_loc.x, + w.location.y + wire.pips_downhill[0].rel_loc.y); + } else if (wire.num_uphill > 0) { + return std::make_pair(w.location.x + wire.pips_uphill[0].rel_loc.x, + w.location.y + wire.pips_uphill[0].rel_loc.y); } else { - return std::make_pair(w.location.x, w.location.y); + return std::make_pair(int(w.location.x), int(w.location.y)); } }; auto src_loc = est_location(src), dst_loc = est_location(dst); - return (240 - 20 * args.speed) * (abs(src_loc.first - dst_loc.first) + abs(src_loc.second - dst_loc.second)); + int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); + return (130 - 25 * args.speed) * + (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const { const auto &driver = net_info->driver; + if ((driver.port == id_FCO && sink.port == id_FCI) || sink.port == id_FXA || sink.port == id_FXB) + return 0; auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); - return (240 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); + // Encourage use of direct interconnect + if (driver_loc.x == sink_loc.x && driver_loc.y == sink_loc.y) { + if ((sink.port == id_A0 || sink.port == id_A1) && (driver.port == id_F1) && + (driver_loc.z == 2 || driver_loc.z == 3)) + return 0; + if ((sink.port == id_B0 || sink.port == id_B1) && (driver.port == id_F1) && + (driver_loc.z == 0 || driver_loc.z == 1)) + return 0; + if ((sink.port == id_C0 || sink.port == id_C1) && (driver.port == id_F0) && + (driver_loc.z == 2 || driver_loc.z == 3)) + return 0; + if ((sink.port == id_D0 || sink.port == id_D1) && (driver.port == id_F0) && + (driver_loc.z == 0 || driver_loc.z == 1)) + return 0; + } + + int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); + return (130 - 25 * args.speed) * + (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } -bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { return false; } +bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const +{ + if (net_info->driver.port == id_FCO && sink.port == id_FCI) { + budget = 0; + return true; + } else if (sink.port == id_FXA || sink.port == id_FXB) { + budget = 0; + return true; + } else { + return false; + } +} // ----------------------------------------------------------------------- -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +bool Arch::place() +{ + bool result = placer1(getCtx(), Placer1Cfg(getCtx())); + if (result) + permute_luts(); + return result; +} bool Arch::route() { @@ -602,7 +648,7 @@ bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort // Data for -8 grade if (cell->type == id_TRELLIS_SLICE) { - bool has_carry = str_or_default(cell->params, id("MODE"), "LOGIC") == "CCU2"; + bool has_carry = cell->sliceInfo.is_carry; if (fromPort == id_A0 || fromPort == id_B0 || fromPort == id_C0 || fromPort == id_D0 || fromPort == id_A1 || fromPort == id_B1 || fromPort == id_C1 || fromPort == id_D1 || fromPort == id_M0 || fromPort == id_M1 || fromPort == id_FXA || fromPort == id_FXB || fromPort == id_FCI) { @@ -639,7 +685,7 @@ TimingPortClass Arch::getPortTimingClass(const CellInfo *cell, IdString port, in auto disconnected = [cell](IdString p) { return !cell->ports.count(p) || cell->ports.at(p).net == nullptr; }; clockInfoCount = 0; if (cell->type == id_TRELLIS_SLICE) { - int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); + int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1; if (port == id_CLK || port == id_WCK) return TMG_CLOCK_INPUT; if (port == id_A0 || port == id_A1 || port == id_B0 || port == id_B1 || port == id_C0 || port == id_C1 || @@ -782,8 +828,7 @@ TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port info.hold = getDelayFromNS(0); info.clockToQ = getDelayFromNS(0); if (cell->type == id_TRELLIS_SLICE) { - int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); - + int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1; if (port == id_WD0 || port == id_WD1 || port == id_WAD0 || port == id_WAD1 || port == id_WAD2 || port == id_WAD3 || port == id_WRE) { info.edge = RISING_EDGE; diff --git a/ecp5/arch.h b/ecp5/arch.h index 6a2f2bf5..59c8638d 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -918,7 +918,7 @@ struct Arch : BaseCtx delay_t estimateDelay(WireId src, WireId dst) const; delay_t predictDelay(const NetInfo *net_info, const PortRef &sink) const; delay_t getDelayEpsilon() const { return 20; } - delay_t getRipupDelayPenalty() const { return 200; } + delay_t getRipupDelayPenalty() const { return 250; } float getDelayNS(delay_t v) const { return v * 0.001; } DelayInfo getDelayFromNS(float ns) const { @@ -971,6 +971,8 @@ struct Arch : BaseCtx void assignArchInfo(); + void permute_luts(); + std::vector<std::pair<std::string, std::string>> getTilesAtLocation(int row, int col); std::string getTileByTypeAndLocation(int row, int col, std::string type) const { diff --git a/ecp5/arch_place.cc b/ecp5/arch_place.cc index ff70bb5a..e5c9b31f 100644 --- a/ecp5/arch_place.cc +++ b/ecp5/arch_place.cc @@ -18,8 +18,10 @@ */ #include "cells.h" +#include "design_utils.h" #include "log.h" #include "nextpnr.h" +#include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -115,4 +117,76 @@ bool Arch::isValidBelForCell(CellInfo *cell, BelId bel) const } } +void Arch::permute_luts() +{ + NetCriticalityMap nc; + get_criticalities(getCtx(), &nc); + + std::unordered_map<PortInfo *, size_t> port_to_user; + for (auto net : sorted(nets)) { + NetInfo *ni = net.second; + for (size_t i = 0; i < ni->users.size(); i++) { + auto &usr = ni->users.at(i); + port_to_user[&(usr.cell->ports.at(usr.port))] = i; + } + } + + auto proc_lut = [&](CellInfo *ci, int lut) { + std::vector<IdString> port_names; + for (int i = 0; i < 4; i++) + port_names.push_back(id(std::string("ABCD").substr(i, 1) + std::to_string(lut))); + + std::vector<std::pair<float, int>> inputs; + std::vector<NetInfo *> orig_nets; + + for (int i = 0; i < 4; i++) { + auto &port = ci->ports.at(port_names.at(i)); + float crit = 0; + if (port.net != nullptr && nc.count(port.net->name)) { + auto &n = nc.at(port.net->name); + size_t usr = port_to_user.at(&port); + if (usr < n.criticality.size()) + crit = n.criticality.at(usr); + } + orig_nets.push_back(port.net); + inputs.emplace_back(crit, i); + } + // Least critical first (A input is slowest) + std::sort(inputs.begin(), inputs.end()); + for (int i = 0; i < 4; i++) { + IdString p = port_names.at(i); + // log_info("%s %s %f\n", p.c_str(ctx), port_names.at(inputs.at(i).second).c_str(ctx), inputs.at(i).first); + disconnect_port(getCtx(), ci, p); + ci->ports.at(p).net = nullptr; + if (orig_nets.at(inputs.at(i).second) != nullptr) { + connect_port(getCtx(), orig_nets.at(inputs.at(i).second), ci, p); + ci->params[id(p.str(this) + "MUX")] = p.str(this); + } else { + ci->params[id(p.str(this) + "MUX")] = "1"; + } + } + // Rewrite function + int old_init = int_or_default(ci->params, id("LUT" + std::to_string(lut) + "_INITVAL"), 0); + int new_init = 0; + for (int i = 0; i < 16; i++) { + int old_index = 0; + for (int k = 0; k < 4; k++) { + if (i & (1 << k)) + old_index |= (1 << inputs.at(k).second); + } + if (old_init & (1 << old_index)) + new_init |= (1 << i); + } + ci->params[id("LUT" + std::to_string(lut) + "_INITVAL")] = std::to_string(new_init); + }; + + for (auto cell : sorted(cells)) { + CellInfo *ci = cell.second; + if (ci->type == id_TRELLIS_SLICE && str_or_default(ci->params, id("MODE"), "LOGIC") == "LOGIC") { + proc_lut(ci, 0); + proc_lut(ci, 1); + } + } +} + NEXTPNR_NAMESPACE_END diff --git a/ecp5/archdefs.h b/ecp5/archdefs.h index bfc5769b..d7ea0a8e 100644 --- a/ecp5/archdefs.h +++ b/ecp5/archdefs.h @@ -159,7 +159,9 @@ struct ArchCellInfo { bool using_dff; bool has_l6mux; + bool is_carry; IdString clk_sig, lsr_sig, clkmux, lsrmux, srmode; + int sd0, sd1; } sliceInfo; }; diff --git a/ecp5/globals.cc b/ecp5/globals.cc index 49947b20..fae2c683 100644 --- a/ecp5/globals.cc +++ b/ecp5/globals.cc @@ -448,6 +448,8 @@ class Ecp5GlobalRouter if (i < 8) fab_globals.insert(i); } + std::vector<std::pair<PortRef *, int>> toroute; + std::unordered_map<int, NetInfo *> clocks; for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; if (ci->type == id_DCCA) { @@ -472,15 +474,18 @@ class Ecp5GlobalRouter NPNR_ASSERT(routed); // WCK must have routing priority - auto sorted_users = clock->users; - std::sort(sorted_users.begin(), sorted_users.end(), [this](const PortRef &a, const PortRef &b) { - return global_route_priority(a) < global_route_priority(b); - }); - for (const auto &user : sorted_users) { - route_logic_tile_global(clock, glbid, user); - } + for (auto &user : clock->users) + toroute.emplace_back(&user, glbid); + clocks[glbid] = clock; } } + std::sort(toroute.begin(), toroute.end(), + [this](const std::pair<PortRef *, int> &a, const std::pair<PortRef *, int> &b) { + return global_route_priority(*a.first) < global_route_priority(*b.first); + }); + for (const auto &user : toroute) { + route_logic_tile_global(clocks.at(user.second), user.second, *user.first); + } } }; void promote_ecp5_globals(Context *ctx) { Ecp5GlobalRouter(ctx).promote_globals(); } diff --git a/ecp5/pack.cc b/ecp5/pack.cc index 64682fd2..1b07c2ae 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -27,6 +27,7 @@ #include "design_utils.h" #include "globals.h" #include "log.h" +#include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -164,6 +165,7 @@ class Ecp5Packer CellInfo *ci = cell.second; if (is_lut(ctx, ci) && procdLuts.find(cell.first) == procdLuts.end()) { NetInfo *znet = ci->ports.at(ctx->id("Z")).net; + std::vector<NetInfo *> inpnets; if (znet != nullptr) { for (auto user : znet->users) { if (is_lut(ctx, user.cell) && user.cell != ci && @@ -228,12 +230,67 @@ class Ecp5Packer } } } + + // Pack LUTs feeding the same CCU2, RAM or DFF into a SLICE + if (znet != nullptr && znet->users.size() < 10) { + for (auto user : znet->users) { + if (is_lc(ctx, user.cell) || user.cell->type == ctx->id("DP16KD") || is_ff(ctx, user.cell)) { + for (auto port : user.cell->ports) { + if (port.second.type != PORT_IN || port.second.net == nullptr || + port.second.net == znet) + continue; + if (port.second.net->users.size() > 10) + continue; + CellInfo *drv = port.second.net->driver.cell; + if (drv == nullptr) + continue; + if (is_lut(ctx, drv) && !procdLuts.count(drv->name) && + can_pack_lutff(ci->name, drv->name)) { + procdLuts.insert(ci->name); + procdLuts.insert(drv->name); + lutPairs[ci->name] = drv->name; + goto paired_inlut; + } + } + } + } + } + + // Pack LUTs sharing an input with a simple fanout-based heuristic + for (const char *inp : {"A", "B", "C", "D"}) { + NetInfo *innet = ci->ports.at(ctx->id(inp)).net; + if (innet != nullptr && innet->users.size() < 5 && innet->users.size() > 1) + inpnets.push_back(innet); + } + std::sort(inpnets.begin(), inpnets.end(), + [&](const NetInfo *a, const NetInfo *b) { return a->users.size() < b->users.size(); }); + for (auto inet : inpnets) { + for (auto &user : inet->users) { + if (user.cell == nullptr || user.cell == ci || !is_lut(ctx, user.cell)) + continue; + if (procdLuts.count(user.cell->name)) + continue; + if (can_pack_lutff(ci->name, user.cell->name)) { + procdLuts.insert(ci->name); + procdLuts.insert(user.cell->name); + lutPairs[ci->name] = user.cell->name; + goto paired_inlut; + } + } + } + if (false) { paired_inlut: continue; } } } + if (ctx->debug) { + log_info("Singleton LUTs (packer QoR debug): \n"); + for (auto cell : sorted(ctx->cells)) + if (is_lut(ctx, cell.second) && !procdLuts.count(cell.first)) + log_info(" %s\n", cell.first.c_str(ctx)); + } } // Return true if an port is a top level port that provides its own IOBUF @@ -940,11 +997,11 @@ class Ecp5Packer if (is_lut(ctx, ci)) { std::unique_ptr<CellInfo> slice = create_ecp5_cell(ctx, ctx->id("TRELLIS_SLICE"), ci->name.str(ctx) + "_SLICE"); - lut_to_slice(ctx, ci, slice.get(), 0); + lut_to_slice(ctx, ci, slice.get(), 1); auto ff = lutffPairs.find(ci->name); if (ff != lutffPairs.end()) { - ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 0, true); + ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 1, true); packed_cells.insert(ff->second); fflutPairs.erase(ff->second); lutffPairs.erase(ci->name); @@ -1825,7 +1882,8 @@ class Ecp5Packer } iol->params[ctx->id("DELAY.DEL_VALUE")] = std::to_string(lookup_delay(str_or_default(ci->params, ctx->id("DEL_MODE"), "USER_DEFINED"))); - if (ci->params.count(ctx->id("DEL_VALUE")) && ci->params.at(ctx->id("DEL_VALUE")) != "DELAY0") + if (ci->params.count(ctx->id("DEL_VALUE")) && + ci->params.at(ctx->id("DEL_VALUE")).substr(0, 5) != "DELAY") iol->params[ctx->id("DELAY.DEL_VALUE")] = ci->params.at(ctx->id("DEL_VALUE")); if (ci->ports.count(id_LOADN)) replace_port(ci, id_LOADN, iol, id_LOADN); @@ -2388,6 +2446,9 @@ void Arch::assignArchInfo() ci->sliceInfo.clkmux = id(str_or_default(ci->params, id_CLKMUX, "CLK")); ci->sliceInfo.lsrmux = id(str_or_default(ci->params, id_LSRMUX, "LSR")); ci->sliceInfo.srmode = id(str_or_default(ci->params, id_SRMODE, "LSR_OVER_CE")); + ci->sliceInfo.is_carry = str_or_default(ci->params, id("MODE"), "LOGIC") == "CCU2"; + ci->sliceInfo.sd0 = int_or_default(ci->params, id("REG0_SD"), 0); + ci->sliceInfo.sd1 = int_or_default(ci->params, id("REG1_SD"), 0); ci->sliceInfo.has_l6mux = false; if (ci->ports.count(id_FXA) && ci->ports[id_FXA].net != nullptr && ci->ports[id_FXA].net->driver.port == id_OFX0) diff --git a/ecp5/trellis_import.py b/ecp5/trellis_import.py index 6acc32c5..610bd331 100755 --- a/ecp5/trellis_import.py +++ b/ecp5/trellis_import.py @@ -153,7 +153,7 @@ speed_grade_names = ["6", "7", "8", "8_5G"] speed_grade_cells = {} speed_grade_pips = {} -pip_class_to_idx = {"default": 0} +pip_class_to_idx = {"default": 0, "zero": 1} timing_port_xform = { "RAD0": "D0", @@ -199,7 +199,7 @@ def process_timing_data(): pip_class_delays = [] for i in range(len(pip_class_to_idx)): pip_class_delays.append((50, 50, 0, 0)) - + pip_class_delays[pip_class_to_idx["zero"]] = (0, 0, 0, 0) with open(timing_dbs.interconnect_db_path("ECP5", grade)) as f: interconn_data = json.load(f) for pipclass, pipdata in sorted(interconn_data.items()): @@ -219,6 +219,12 @@ def process_timing_data(): def get_pip_class(wire_from, wire_to): + + if "FCO" in wire_from or "FCI" in wire_to: + return pip_class_to_idx["zero"] + if "F5" in wire_from or "FX" in wire_from or "FXA" in wire_to or "FXB" in wire_to: + return pip_class_to_idx["zero"] + class_name = pip_classes.get_pip_class(wire_from, wire_to) if class_name is None or class_name not in pip_class_to_idx: class_name = "default" |