From 998d055ea7f8bcc423d2aa2d75f5f27b6368666e Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 13 Dec 2018 13:40:50 +0000 Subject: ecp5: Speed up timing analysis Signed-off-by: David Shah --- ecp5/arch.cc | 7 +++---- ecp5/archdefs.h | 2 ++ ecp5/pack.cc | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 7de5c7aa..0d6b6a55 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -602,7 +602,7 @@ bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort // Data for -8 grade if (cell->type == id_TRELLIS_SLICE) { - bool has_carry = str_or_default(cell->params, id("MODE"), "LOGIC") == "CCU2"; + bool has_carry = cell->sliceInfo.is_carry; if (fromPort == id_A0 || fromPort == id_B0 || fromPort == id_C0 || fromPort == id_D0 || fromPort == id_A1 || fromPort == id_B1 || fromPort == id_C1 || fromPort == id_D1 || fromPort == id_M0 || fromPort == id_M1 || fromPort == id_FXA || fromPort == id_FXB || fromPort == id_FCI) { @@ -639,7 +639,7 @@ TimingPortClass Arch::getPortTimingClass(const CellInfo *cell, IdString port, in auto disconnected = [cell](IdString p) { return !cell->ports.count(p) || cell->ports.at(p).net == nullptr; }; clockInfoCount = 0; if (cell->type == id_TRELLIS_SLICE) { - int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); + int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1; if (port == id_CLK || port == id_WCK) return TMG_CLOCK_INPUT; if (port == id_A0 || port == id_A1 || port == id_B0 || port == id_B1 || port == id_C0 || port == id_C1 || @@ -782,8 +782,7 @@ TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port info.hold = getDelayFromNS(0); info.clockToQ = getDelayFromNS(0); if (cell->type == id_TRELLIS_SLICE) { - int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); - + int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1; if (port == id_WD0 || port == id_WD1 || port == id_WAD0 || port == id_WAD1 || port == id_WAD2 || port == id_WAD3 || port == id_WRE) { info.edge = RISING_EDGE; diff --git a/ecp5/archdefs.h b/ecp5/archdefs.h index bfc5769b..d7ea0a8e 100644 --- a/ecp5/archdefs.h +++ b/ecp5/archdefs.h @@ -159,7 +159,9 @@ struct ArchCellInfo { bool using_dff; bool has_l6mux; + bool is_carry; IdString clk_sig, lsr_sig, clkmux, lsrmux, srmode; + int sd0, sd1; } sliceInfo; }; diff --git a/ecp5/pack.cc b/ecp5/pack.cc index 64682fd2..db8c4002 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -2388,6 +2388,9 @@ void Arch::assignArchInfo() ci->sliceInfo.clkmux = id(str_or_default(ci->params, id_CLKMUX, "CLK")); ci->sliceInfo.lsrmux = id(str_or_default(ci->params, id_LSRMUX, "LSR")); ci->sliceInfo.srmode = id(str_or_default(ci->params, id_SRMODE, "LSR_OVER_CE")); + ci->sliceInfo.is_carry = str_or_default(ci->params, id("MODE"), "LOGIC") == "CCU2"; + ci->sliceInfo.sd0 = int_or_default(ci->params, id("REG0_SD"), 0); + ci->sliceInfo.sd1 = int_or_default(ci->params, id("REG1_SD"), 0); ci->sliceInfo.has_l6mux = false; if (ci->ports.count(id_FXA) && ci->ports[id_FXA].net != nullptr && ci->ports[id_FXA].net->driver.port == id_OFX0) -- cgit v1.2.3 From af3ff143be312b0f73289955bd513925f2bb7c4f Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 8 Jan 2019 10:52:03 +0000 Subject: ecp5: Improve delay model Signed-off-by: David Shah --- ecp5/arch.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 0d6b6a55..1abb8af0 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -448,16 +448,17 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const auto src_loc = est_location(src), dst_loc = est_location(dst); - return (240 - 20 * args.speed) * (abs(src_loc.first - dst_loc.first) + abs(src_loc.second - dst_loc.second)); + return (110 - 10 * args.speed) + (200 - 20 * args.speed) * (abs(src_loc.first - dst_loc.first) + abs(src_loc.second - dst_loc.second)); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const { const auto &driver = net_info->driver; + if (driver.port == id_FCO && sink.port == id_FCI) + return 0; auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); - - return (240 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); + return (110 - 10 * args.speed) + (200 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); } bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { return false; } -- cgit v1.2.3 From f5b11ce075544d00ccafaf4363d099b6f1806335 Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 8 Jan 2019 13:06:02 +0000 Subject: ecp5: Implement budget overrides for carry chains and SLICE muxes Signed-off-by: David Shah --- ecp5/arch.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 1abb8af0..ad207d14 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -454,14 +454,24 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const { const auto &driver = net_info->driver; - if (driver.port == id_FCO && sink.port == id_FCI) + if ((driver.port == id_FCO && sink.port == id_FCI) || sink.port == id_FXA || sink.port == id_FXB) return 0; auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); return (110 - 10 * args.speed) + (200 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); } -bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { return false; } +bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { + if (net_info->driver.port == id_FCO && sink.port == id_FCI) { + return true; + budget = 0; + } else if (sink.port == id_FXA || sink.port == id_FXB) { + return true; + budget = 0; + } else { + return false; + } +} // ----------------------------------------------------------------------- -- cgit v1.2.3 From 55b0b60d9d58961bfefea66fcc197b399424d9d6 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 7 Feb 2019 19:19:15 +0000 Subject: ecp5: Router performance improvements Signed-off-by: David Shah --- common/timing.cc | 4 ++++ ecp5/arch.cc | 21 +++++++++++++++++---- ecp5/arch.h | 2 +- ecp5/trellis_import.py | 10 ++++++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index 64dcdf71..db38b11b 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -837,6 +837,10 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p auto cursor = sink_wire; delay_t delay; while (driver_wire != cursor) { +#ifdef ARCH_ECP5 + if (net->is_global) + break; +#endif auto it = net->wires.find(cursor); assert(it != net->wires.end()); auto pip = it->second.pip; diff --git a/ecp5/arch.cc b/ecp5/arch.cc index ad207d14..2bce0b01 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -427,6 +427,16 @@ BelId Arch::getBelByLocation(Loc loc) const delay_t Arch::estimateDelay(WireId src, WireId dst) const { + WireId cursor = dst; + + int num_uh = locInfo(dst)->wire_data[dst.index].num_uphill; + if (num_uh < 6) { + for (auto uh : getPipsUphill(dst)) { + if (getPipSrcWire(uh) == src) + return getPipDelay(uh).maxDelay(); + } + } + auto est_location = [&](WireId w) -> std::pair { if (w.location.x == 0 && w.location.y == 0) { // Global wires @@ -448,7 +458,8 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const auto src_loc = est_location(src), dst_loc = est_location(dst); - return (110 - 10 * args.speed) + (200 - 20 * args.speed) * (abs(src_loc.first - dst_loc.first) + abs(src_loc.second - dst_loc.second)); + int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); + return (130 - 13 * args.speed) * (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -458,16 +469,18 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const return 0; auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); - return (110 - 10 * args.speed) + (200 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); + + int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); + return (130 - 13 * args.speed) * (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { if (net_info->driver.port == id_FCO && sink.port == id_FCI) { - return true; budget = 0; - } else if (sink.port == id_FXA || sink.port == id_FXB) { return true; + } else if (sink.port == id_FXA || sink.port == id_FXB) { budget = 0; + return true; } else { return false; } diff --git a/ecp5/arch.h b/ecp5/arch.h index 6a2f2bf5..ee412cca 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -918,7 +918,7 @@ struct Arch : BaseCtx delay_t estimateDelay(WireId src, WireId dst) const; delay_t predictDelay(const NetInfo *net_info, const PortRef &sink) const; delay_t getDelayEpsilon() const { return 20; } - delay_t getRipupDelayPenalty() const { return 200; } + delay_t getRipupDelayPenalty() const { return 500; } float getDelayNS(delay_t v) const { return v * 0.001; } DelayInfo getDelayFromNS(float ns) const { diff --git a/ecp5/trellis_import.py b/ecp5/trellis_import.py index 6acc32c5..610bd331 100755 --- a/ecp5/trellis_import.py +++ b/ecp5/trellis_import.py @@ -153,7 +153,7 @@ speed_grade_names = ["6", "7", "8", "8_5G"] speed_grade_cells = {} speed_grade_pips = {} -pip_class_to_idx = {"default": 0} +pip_class_to_idx = {"default": 0, "zero": 1} timing_port_xform = { "RAD0": "D0", @@ -199,7 +199,7 @@ def process_timing_data(): pip_class_delays = [] for i in range(len(pip_class_to_idx)): pip_class_delays.append((50, 50, 0, 0)) - + pip_class_delays[pip_class_to_idx["zero"]] = (0, 0, 0, 0) with open(timing_dbs.interconnect_db_path("ECP5", grade)) as f: interconn_data = json.load(f) for pipclass, pipdata in sorted(interconn_data.items()): @@ -219,6 +219,12 @@ def process_timing_data(): def get_pip_class(wire_from, wire_to): + + if "FCO" in wire_from or "FCI" in wire_to: + return pip_class_to_idx["zero"] + if "F5" in wire_from or "FX" in wire_from or "FXA" in wire_to or "FXB" in wire_to: + return pip_class_to_idx["zero"] + class_name = pip_classes.get_pip_class(wire_from, wire_to) if class_name is None or class_name not in pip_class_to_idx: class_name = "default" -- cgit v1.2.3 From 4ec2bd1e5deebf738e35ecf594a958cb0166f4af Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 10:54:24 +0000 Subject: ecp5: Fix global clock routing with multiclock DPRAM Signed-off-by: David Shah --- ecp5/arch.cc | 9 ++++++--- ecp5/globals.cc | 19 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 2bce0b01..bec9278d 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -459,7 +459,8 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const auto src_loc = est_location(src), dst_loc = est_location(dst); int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); - return (130 - 13 * args.speed) * (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + return (130 - 13 * args.speed) * + (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -471,10 +472,12 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const auto sink_loc = getBelLocation(sink.cell->bel); int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); - return (130 - 13 * args.speed) * (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + return (130 - 13 * args.speed) * + (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } -bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { +bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const +{ if (net_info->driver.port == id_FCO && sink.port == id_FCI) { budget = 0; return true; diff --git a/ecp5/globals.cc b/ecp5/globals.cc index 49947b20..fae2c683 100644 --- a/ecp5/globals.cc +++ b/ecp5/globals.cc @@ -448,6 +448,8 @@ class Ecp5GlobalRouter if (i < 8) fab_globals.insert(i); } + std::vector> toroute; + std::unordered_map clocks; for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; if (ci->type == id_DCCA) { @@ -472,15 +474,18 @@ class Ecp5GlobalRouter NPNR_ASSERT(routed); // WCK must have routing priority - auto sorted_users = clock->users; - std::sort(sorted_users.begin(), sorted_users.end(), [this](const PortRef &a, const PortRef &b) { - return global_route_priority(a) < global_route_priority(b); - }); - for (const auto &user : sorted_users) { - route_logic_tile_global(clock, glbid, user); - } + for (auto &user : clock->users) + toroute.emplace_back(&user, glbid); + clocks[glbid] = clock; } } + std::sort(toroute.begin(), toroute.end(), + [this](const std::pair &a, const std::pair &b) { + return global_route_priority(*a.first) < global_route_priority(*b.first); + }); + for (const auto &user : toroute) { + route_logic_tile_global(clocks.at(user.second), user.second, *user.first); + } } }; void promote_ecp5_globals(Context *ctx) { Ecp5GlobalRouter(ctx).promote_globals(); } -- cgit v1.2.3 From 89de4caf6c1c97bd22cdd79abe24788ec3bb3665 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:03:59 +0000 Subject: timing: Fix negative slack overflow issue Signed-off-by: David Shah --- common/timing.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/timing.cc b/common/timing.cc index db38b11b..8ade2660 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -611,8 +611,9 @@ struct Timing continue; delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { - float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); - nc.criticality.at(i) = criticality; + float criticality = + 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); + nc.criticality.at(i) = std::min(1.0, std::max(0.0, criticality)); } nc.max_path_length = nd.max_path_length; nc.cd_worst_slack = worst_slack.at(startdomain.first); -- cgit v1.2.3 From f363dd2d3c8d00b4d237208d394ed185203a6890 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:04:13 +0000 Subject: ecp5: Delay tuning Signed-off-by: David Shah --- ecp5/arch.cc | 49 +++++++++++++++++++++++++++++++------------------ ecp5/arch.h | 2 +- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index bec9278d..cc529df8 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -437,30 +437,27 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const } } - auto est_location = [&](WireId w) -> std::pair { - if (w.location.x == 0 && w.location.y == 0) { - // Global wires - const auto &wire = locInfo(w)->wire_data[w.index]; - // Use location of first downhill bel or pip, if available - if (wire.num_bel_pins > 0) { - return std::make_pair(wire.bel_pins[0].rel_bel_loc.x, wire.bel_pins[0].rel_bel_loc.y); - } else if (wire.num_downhill > 0) { - return std::make_pair(wire.pips_downhill[0].rel_loc.x, wire.pips_downhill[0].rel_loc.y); - } else if (wire.num_uphill > 0) { - return std::make_pair(wire.pips_uphill[0].rel_loc.x, wire.pips_uphill[0].rel_loc.y); - } else { - return std::make_pair(0, 0); - } + auto est_location = [&](WireId w) -> std::pair { + const auto &wire = locInfo(w)->wire_data[w.index]; + if (wire.num_bel_pins > 0) { + return std::make_pair(w.location.x + wire.bel_pins[0].rel_bel_loc.x, + w.location.y + wire.bel_pins[0].rel_bel_loc.y); + } else if (wire.num_downhill > 0) { + return std::make_pair(w.location.x + wire.pips_downhill[0].rel_loc.x, + w.location.y + wire.pips_downhill[0].rel_loc.y); + } else if (wire.num_uphill > 0) { + return std::make_pair(w.location.x + wire.pips_uphill[0].rel_loc.x, + w.location.y + wire.pips_uphill[0].rel_loc.y); } else { - return std::make_pair(w.location.x, w.location.y); + return std::make_pair(int(w.location.x), int(w.location.y)); } }; auto src_loc = est_location(src), dst_loc = est_location(dst); int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); - return (130 - 13 * args.speed) * - (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + return (130 - 25 * args.speed) * + (8 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -471,8 +468,24 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); + // Encourage use of direct interconnect + if (driver_loc.x == sink_loc.x && driver_loc.y == sink_loc.y) { + if ((sink.port == id_A0 || sink.port == id_A1) && (driver.port == id_F1) && + (driver_loc.z == 2 || driver_loc.z == 3)) + return 0; + if ((sink.port == id_B0 || sink.port == id_B1) && (driver.port == id_F1) && + (driver_loc.z == 0 || driver_loc.z == 1)) + return 0; + if ((sink.port == id_C0 || sink.port == id_C1) && (driver.port == id_F0) && + (driver_loc.z == 2 || driver_loc.z == 3)) + return 0; + if ((sink.port == id_D0 || sink.port == id_D1) && (driver.port == id_F0) && + (driver_loc.z == 0 || driver_loc.z == 1)) + return 0; + } + int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); - return (130 - 13 * args.speed) * + return (130 - 25 * args.speed) * (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } diff --git a/ecp5/arch.h b/ecp5/arch.h index ee412cca..992bdb94 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -918,7 +918,7 @@ struct Arch : BaseCtx delay_t estimateDelay(WireId src, WireId dst) const; delay_t predictDelay(const NetInfo *net_info, const PortRef &sink) const; delay_t getDelayEpsilon() const { return 20; } - delay_t getRipupDelayPenalty() const { return 500; } + delay_t getRipupDelayPenalty() const { return 250; } float getDelayNS(delay_t v) const { return v * 0.001; } DelayInfo getDelayFromNS(float ns) const { -- cgit v1.2.3 From a0fa16439942d15e9be745ec074fc1ba3a2a7c95 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:06:33 +0000 Subject: ecp5: Add criticality-based LUT permutation Signed-off-by: David Shah --- ecp5/arch.cc | 12 ++++++++- ecp5/arch.h | 2 ++ ecp5/arch_place.cc | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ ecp5/pack.cc | 1 + 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index cc529df8..fdc9c8fc 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -486,7 +486,7 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); return (130 - 25 * args.speed) * - (4 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const @@ -504,7 +504,17 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- +<<<<<<< HEAD bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +======= +bool Arch::place() +{ + bool result = placer_heap(getCtx()); + if (result) + permute_luts(); + return result; +} +>>>>>>> 136e030... lut permutation bool Arch::route() { diff --git a/ecp5/arch.h b/ecp5/arch.h index 992bdb94..59c8638d 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -971,6 +971,8 @@ struct Arch : BaseCtx void assignArchInfo(); + void permute_luts(); + std::vector> getTilesAtLocation(int row, int col); std::string getTileByTypeAndLocation(int row, int col, std::string type) const { diff --git a/ecp5/arch_place.cc b/ecp5/arch_place.cc index ff70bb5a..e5c9b31f 100644 --- a/ecp5/arch_place.cc +++ b/ecp5/arch_place.cc @@ -18,8 +18,10 @@ */ #include "cells.h" +#include "design_utils.h" #include "log.h" #include "nextpnr.h" +#include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -115,4 +117,76 @@ bool Arch::isValidBelForCell(CellInfo *cell, BelId bel) const } } +void Arch::permute_luts() +{ + NetCriticalityMap nc; + get_criticalities(getCtx(), &nc); + + std::unordered_map port_to_user; + for (auto net : sorted(nets)) { + NetInfo *ni = net.second; + for (size_t i = 0; i < ni->users.size(); i++) { + auto &usr = ni->users.at(i); + port_to_user[&(usr.cell->ports.at(usr.port))] = i; + } + } + + auto proc_lut = [&](CellInfo *ci, int lut) { + std::vector port_names; + for (int i = 0; i < 4; i++) + port_names.push_back(id(std::string("ABCD").substr(i, 1) + std::to_string(lut))); + + std::vector> inputs; + std::vector orig_nets; + + for (int i = 0; i < 4; i++) { + auto &port = ci->ports.at(port_names.at(i)); + float crit = 0; + if (port.net != nullptr && nc.count(port.net->name)) { + auto &n = nc.at(port.net->name); + size_t usr = port_to_user.at(&port); + if (usr < n.criticality.size()) + crit = n.criticality.at(usr); + } + orig_nets.push_back(port.net); + inputs.emplace_back(crit, i); + } + // Least critical first (A input is slowest) + std::sort(inputs.begin(), inputs.end()); + for (int i = 0; i < 4; i++) { + IdString p = port_names.at(i); + // log_info("%s %s %f\n", p.c_str(ctx), port_names.at(inputs.at(i).second).c_str(ctx), inputs.at(i).first); + disconnect_port(getCtx(), ci, p); + ci->ports.at(p).net = nullptr; + if (orig_nets.at(inputs.at(i).second) != nullptr) { + connect_port(getCtx(), orig_nets.at(inputs.at(i).second), ci, p); + ci->params[id(p.str(this) + "MUX")] = p.str(this); + } else { + ci->params[id(p.str(this) + "MUX")] = "1"; + } + } + // Rewrite function + int old_init = int_or_default(ci->params, id("LUT" + std::to_string(lut) + "_INITVAL"), 0); + int new_init = 0; + for (int i = 0; i < 16; i++) { + int old_index = 0; + for (int k = 0; k < 4; k++) { + if (i & (1 << k)) + old_index |= (1 << inputs.at(k).second); + } + if (old_init & (1 << old_index)) + new_init |= (1 << i); + } + ci->params[id("LUT" + std::to_string(lut) + "_INITVAL")] = std::to_string(new_init); + }; + + for (auto cell : sorted(cells)) { + CellInfo *ci = cell.second; + if (ci->type == id_TRELLIS_SLICE && str_or_default(ci->params, id("MODE"), "LOGIC") == "LOGIC") { + proc_lut(ci, 0); + proc_lut(ci, 1); + } + } +} + NEXTPNR_NAMESPACE_END diff --git a/ecp5/pack.cc b/ecp5/pack.cc index db8c4002..b05aec71 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -27,6 +27,7 @@ #include "design_utils.h" #include "globals.h" #include "log.h" +#include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN -- cgit v1.2.3 From 95a85c8ea76cdd0a1c5824200451569366c9eb8c Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:07:21 +0000 Subject: ecp5: Improve packing density Signed-off-by: David Shah --- ecp5/arch.cc | 2 +- ecp5/pack.cc | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index fdc9c8fc..cd5fa0cb 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -457,7 +457,7 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); return (130 - 25 * args.speed) * - (8 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const diff --git a/ecp5/pack.cc b/ecp5/pack.cc index b05aec71..9f987f35 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -165,6 +165,7 @@ class Ecp5Packer CellInfo *ci = cell.second; if (is_lut(ctx, ci) && procdLuts.find(cell.first) == procdLuts.end()) { NetInfo *znet = ci->ports.at(ctx->id("Z")).net; + std::vector inpnets; if (znet != nullptr) { for (auto user : znet->users) { if (is_lut(ctx, user.cell) && user.cell != ci && @@ -229,14 +230,71 @@ class Ecp5Packer } } } + + // Pack LUTs feeding the same CCU2, RAM or DFF into a SLICE + if (znet != nullptr && znet->users.size() < 10) { + for (auto user : znet->users) { + if (is_lc(ctx, user.cell) || user.cell->type == ctx->id("DP16KD") || is_ff(ctx, user.cell)) { + for (auto port : user.cell->ports) { + if (port.second.type != PORT_IN || port.second.net == nullptr || port.second.net == znet) + continue; + if (port.second.net->users.size() > 10) + continue; + CellInfo *drv = port.second.net->driver.cell; + if (drv == nullptr) + continue; + if (is_lut(ctx, drv) && !procdLuts.count(drv->name) && can_pack_lutff(ci->name, drv->name)) { + procdLuts.insert(ci->name); + procdLuts.insert(drv->name); + lutPairs[ci->name] = drv->name; + goto paired_inlut; + } + + } + } + } + } + + // Pack LUTs sharing an input with a simple fanout-based heuristic + for (const char *inp : {"A", "B", "C", "D"}) { + NetInfo *innet = ci->ports.at(ctx->id(inp)).net; + if (innet != nullptr && innet->users.size() < 5 && innet->users.size() > 1) + inpnets.push_back(innet); + } + std::sort(inpnets.begin(), inpnets.end(), [&](const NetInfo *a, const NetInfo *b) { + return a->users.size() < b->users.size(); + }); + for (auto inet : inpnets) { + for (auto &user : inet->users) { + if (user.cell == nullptr || user.cell == ci || !is_lut(ctx, user.cell)) + continue; + if (procdLuts.count(user.cell->name)) + continue; + if (can_pack_lutff(ci->name, user.cell->name)) { + procdLuts.insert(ci->name); + procdLuts.insert(user.cell->name); + lutPairs[ci->name] = user.cell->name; + goto paired_inlut; + } + + } + } + if (false) { paired_inlut: continue; } } } + if (ctx->debug) { + log_info("Singleton LUTs (packer QoR debug): \n"); + for (auto cell : sorted(ctx->cells)) + if (is_lut(ctx, cell.second) && !procdLuts.count(cell.first)) + log_info(" %s\n", cell.first.c_str(ctx)); + } } + // Return true if an port is a top level port that provides its own IOBUF bool is_top_port(PortRef &port) { -- cgit v1.2.3 From df79d94944b4d92207be7ddedc6424b7c931f313 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:07:55 +0000 Subject: ecp5: DELAY fixes Signed-off-by: David Shah --- ecp5/arch.cc | 6 +----- ecp5/pack.cc | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index cd5fa0cb..da0f7b1a 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -504,17 +504,13 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -<<<<<<< HEAD -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } -======= bool Arch::place() { - bool result = placer_heap(getCtx()); + bool result = placer1(getCtx(), Placer1Cfg(getCtx())); if (result) permute_luts(); return result; } ->>>>>>> 136e030... lut permutation bool Arch::route() { diff --git a/ecp5/pack.cc b/ecp5/pack.cc index 9f987f35..1b07c2ae 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -236,20 +236,21 @@ class Ecp5Packer for (auto user : znet->users) { if (is_lc(ctx, user.cell) || user.cell->type == ctx->id("DP16KD") || is_ff(ctx, user.cell)) { for (auto port : user.cell->ports) { - if (port.second.type != PORT_IN || port.second.net == nullptr || port.second.net == znet) + if (port.second.type != PORT_IN || port.second.net == nullptr || + port.second.net == znet) continue; if (port.second.net->users.size() > 10) continue; CellInfo *drv = port.second.net->driver.cell; if (drv == nullptr) continue; - if (is_lut(ctx, drv) && !procdLuts.count(drv->name) && can_pack_lutff(ci->name, drv->name)) { + if (is_lut(ctx, drv) && !procdLuts.count(drv->name) && + can_pack_lutff(ci->name, drv->name)) { procdLuts.insert(ci->name); procdLuts.insert(drv->name); lutPairs[ci->name] = drv->name; goto paired_inlut; } - } } } @@ -261,9 +262,8 @@ class Ecp5Packer if (innet != nullptr && innet->users.size() < 5 && innet->users.size() > 1) inpnets.push_back(innet); } - std::sort(inpnets.begin(), inpnets.end(), [&](const NetInfo *a, const NetInfo *b) { - return a->users.size() < b->users.size(); - }); + std::sort(inpnets.begin(), inpnets.end(), + [&](const NetInfo *a, const NetInfo *b) { return a->users.size() < b->users.size(); }); for (auto inet : inpnets) { for (auto &user : inet->users) { if (user.cell == nullptr || user.cell == ci || !is_lut(ctx, user.cell)) @@ -276,7 +276,6 @@ class Ecp5Packer lutPairs[ci->name] = user.cell->name; goto paired_inlut; } - } } @@ -294,7 +293,6 @@ class Ecp5Packer } } - // Return true if an port is a top level port that provides its own IOBUF bool is_top_port(PortRef &port) { @@ -999,11 +997,11 @@ class Ecp5Packer if (is_lut(ctx, ci)) { std::unique_ptr slice = create_ecp5_cell(ctx, ctx->id("TRELLIS_SLICE"), ci->name.str(ctx) + "_SLICE"); - lut_to_slice(ctx, ci, slice.get(), 0); + lut_to_slice(ctx, ci, slice.get(), 1); auto ff = lutffPairs.find(ci->name); if (ff != lutffPairs.end()) { - ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 0, true); + ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 1, true); packed_cells.insert(ff->second); fflutPairs.erase(ff->second); lutffPairs.erase(ci->name); @@ -1884,7 +1882,8 @@ class Ecp5Packer } iol->params[ctx->id("DELAY.DEL_VALUE")] = std::to_string(lookup_delay(str_or_default(ci->params, ctx->id("DEL_MODE"), "USER_DEFINED"))); - if (ci->params.count(ctx->id("DEL_VALUE")) && ci->params.at(ctx->id("DEL_VALUE")) != "DELAY0") + if (ci->params.count(ctx->id("DEL_VALUE")) && + ci->params.at(ctx->id("DEL_VALUE")).substr(0, 5) != "DELAY") iol->params[ctx->id("DELAY.DEL_VALUE")] = ci->params.at(ctx->id("DEL_VALUE")); if (ci->ports.count(id_LOADN)) replace_port(ci, id_LOADN, iol, id_LOADN); -- cgit v1.2.3