diff options
| author | David Shah <davey1576@gmail.com> | 2019-02-25 12:46:06 +0000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-02-25 12:46:06 +0000 | 
| commit | 031725c80eb6c2c0f922b6fa5be57c42330a8a3b (patch) | |
| tree | 49215217265802745407465c41f9b493e74f47b1 | |
| parent | e87fb696653262bea08caa100f0a5d4d31d2a310 (diff) | |
| parent | df79d94944b4d92207be7ddedc6424b7c931f313 (diff) | |
| download | nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.tar.gz nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.tar.bz2 nextpnr-031725c80eb6c2c0f922b6fa5be57c42330a8a3b.zip | |
Merge pull request #242 from YosysHQ/ecp5next
ecp5: Fixes and QoR improvements
| -rw-r--r-- | common/timing.cc | 9 | ||||
| -rw-r--r-- | ecp5/arch.cc | 91 | ||||
| -rw-r--r-- | ecp5/arch.h | 4 | ||||
| -rw-r--r-- | ecp5/arch_place.cc | 74 | ||||
| -rw-r--r-- | ecp5/archdefs.h | 2 | ||||
| -rw-r--r-- | ecp5/globals.cc | 19 | ||||
| -rw-r--r-- | ecp5/pack.cc | 67 | ||||
| -rwxr-xr-x | ecp5/trellis_import.py | 10 | 
8 files changed, 238 insertions, 38 deletions
| diff --git a/common/timing.cc b/common/timing.cc index 64dcdf71..8ade2660 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -611,8 +611,9 @@ struct Timing                          continue;                      delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay;                      for (size_t i = 0; i < net->users.size(); i++) { -                        float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); -                        nc.criticality.at(i) = criticality; +                        float criticality = +                                1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); +                        nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality));                      }                      nc.max_path_length = nd.max_path_length;                      nc.cd_worst_slack = worst_slack.at(startdomain.first); @@ -837,6 +838,10 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p                      auto cursor = sink_wire;                      delay_t delay;                      while (driver_wire != cursor) { +#ifdef ARCH_ECP5 +                        if (net->is_global) +                            break; +#endif                          auto it = net->wires.find(cursor);                          assert(it != net->wires.end());                          auto pip = it->second.pip; diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 7de5c7aa..da0f7b1a 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -427,44 +427,90 @@ BelId Arch::getBelByLocation(Loc loc) const  delay_t Arch::estimateDelay(WireId src, WireId dst) const  { -    auto est_location = [&](WireId w) -> std::pair<int16_t, int16_t> { -        if (w.location.x == 0 && w.location.y == 0) { -            // Global wires -            const auto &wire = locInfo(w)->wire_data[w.index]; -            // Use location of first downhill bel or pip, if available -            if (wire.num_bel_pins > 0) { -                return std::make_pair(wire.bel_pins[0].rel_bel_loc.x, wire.bel_pins[0].rel_bel_loc.y); -            } else if (wire.num_downhill > 0) { -                return std::make_pair(wire.pips_downhill[0].rel_loc.x, wire.pips_downhill[0].rel_loc.y); -            } else if (wire.num_uphill > 0) { -                return std::make_pair(wire.pips_uphill[0].rel_loc.x, wire.pips_uphill[0].rel_loc.y); -            } else { -                return std::make_pair<int16_t, int16_t>(0, 0); -            } +    WireId cursor = dst; + +    int num_uh = locInfo(dst)->wire_data[dst.index].num_uphill; +    if (num_uh < 6) { +        for (auto uh : getPipsUphill(dst)) { +            if (getPipSrcWire(uh) == src) +                return getPipDelay(uh).maxDelay(); +        } +    } + +    auto est_location = [&](WireId w) -> std::pair<int, int> { +        const auto &wire = locInfo(w)->wire_data[w.index]; +        if (wire.num_bel_pins > 0) { +            return std::make_pair(w.location.x + wire.bel_pins[0].rel_bel_loc.x, +                                  w.location.y + wire.bel_pins[0].rel_bel_loc.y); +        } else if (wire.num_downhill > 0) { +            return std::make_pair(w.location.x + wire.pips_downhill[0].rel_loc.x, +                                  w.location.y + wire.pips_downhill[0].rel_loc.y); +        } else if (wire.num_uphill > 0) { +            return std::make_pair(w.location.x + wire.pips_uphill[0].rel_loc.x, +                                  w.location.y + wire.pips_uphill[0].rel_loc.y);          } else { -            return std::make_pair(w.location.x, w.location.y); +            return std::make_pair(int(w.location.x), int(w.location.y));          }      };      auto src_loc = est_location(src), dst_loc = est_location(dst); -    return (240 - 20 * args.speed) * (abs(src_loc.first - dst_loc.first) + abs(src_loc.second - dst_loc.second)); +    int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); +    return (130 - 25 * args.speed) * +           (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5)));  }  delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const  {      const auto &driver = net_info->driver; +    if ((driver.port == id_FCO && sink.port == id_FCI) || sink.port == id_FXA || sink.port == id_FXB) +        return 0;      auto driver_loc = getBelLocation(driver.cell->bel);      auto sink_loc = getBelLocation(sink.cell->bel); -    return (240 - 20 * args.speed) * (abs(driver_loc.x - sink_loc.x) + abs(driver_loc.y - sink_loc.y)); +    // Encourage use of direct interconnect +    if (driver_loc.x == sink_loc.x && driver_loc.y == sink_loc.y) { +        if ((sink.port == id_A0 || sink.port == id_A1) && (driver.port == id_F1) && +            (driver_loc.z == 2 || driver_loc.z == 3)) +            return 0; +        if ((sink.port == id_B0 || sink.port == id_B1) && (driver.port == id_F1) && +            (driver_loc.z == 0 || driver_loc.z == 1)) +            return 0; +        if ((sink.port == id_C0 || sink.port == id_C1) && (driver.port == id_F0) && +            (driver_loc.z == 2 || driver_loc.z == 3)) +            return 0; +        if ((sink.port == id_D0 || sink.port == id_D1) && (driver.port == id_F0) && +            (driver_loc.z == 0 || driver_loc.z == 1)) +            return 0; +    } + +    int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); +    return (130 - 25 * args.speed) * +           (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5)));  } -bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const { return false; } +bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t &budget) const +{ +    if (net_info->driver.port == id_FCO && sink.port == id_FCI) { +        budget = 0; +        return true; +    } else if (sink.port == id_FXA || sink.port == id_FXB) { +        budget = 0; +        return true; +    } else { +        return false; +    } +}  // ----------------------------------------------------------------------- -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +bool Arch::place() +{ +    bool result = placer1(getCtx(), Placer1Cfg(getCtx())); +    if (result) +        permute_luts(); +    return result; +}  bool Arch::route()  { @@ -602,7 +648,7 @@ bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort      // Data for -8 grade      if (cell->type == id_TRELLIS_SLICE) { -        bool has_carry = str_or_default(cell->params, id("MODE"), "LOGIC") == "CCU2"; +        bool has_carry = cell->sliceInfo.is_carry;          if (fromPort == id_A0 || fromPort == id_B0 || fromPort == id_C0 || fromPort == id_D0 || fromPort == id_A1 ||              fromPort == id_B1 || fromPort == id_C1 || fromPort == id_D1 || fromPort == id_M0 || fromPort == id_M1 ||              fromPort == id_FXA || fromPort == id_FXB || fromPort == id_FCI) { @@ -639,7 +685,7 @@ TimingPortClass Arch::getPortTimingClass(const CellInfo *cell, IdString port, in      auto disconnected = [cell](IdString p) { return !cell->ports.count(p) || cell->ports.at(p).net == nullptr; };      clockInfoCount = 0;      if (cell->type == id_TRELLIS_SLICE) { -        int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); +        int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1;          if (port == id_CLK || port == id_WCK)              return TMG_CLOCK_INPUT;          if (port == id_A0 || port == id_A1 || port == id_B0 || port == id_B1 || port == id_C0 || port == id_C1 || @@ -782,8 +828,7 @@ TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port      info.hold = getDelayFromNS(0);      info.clockToQ = getDelayFromNS(0);      if (cell->type == id_TRELLIS_SLICE) { -        int sd0 = int_or_default(cell->params, id("REG0_SD"), 0), sd1 = int_or_default(cell->params, id("REG1_SD"), 0); - +        int sd0 = cell->sliceInfo.sd0, sd1 = cell->sliceInfo.sd1;          if (port == id_WD0 || port == id_WD1 || port == id_WAD0 || port == id_WAD1 || port == id_WAD2 ||              port == id_WAD3 || port == id_WRE) {              info.edge = RISING_EDGE; diff --git a/ecp5/arch.h b/ecp5/arch.h index 6a2f2bf5..59c8638d 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -918,7 +918,7 @@ struct Arch : BaseCtx      delay_t estimateDelay(WireId src, WireId dst) const;      delay_t predictDelay(const NetInfo *net_info, const PortRef &sink) const;      delay_t getDelayEpsilon() const { return 20; } -    delay_t getRipupDelayPenalty() const { return 200; } +    delay_t getRipupDelayPenalty() const { return 250; }      float getDelayNS(delay_t v) const { return v * 0.001; }      DelayInfo getDelayFromNS(float ns) const      { @@ -971,6 +971,8 @@ struct Arch : BaseCtx      void assignArchInfo(); +    void permute_luts(); +      std::vector<std::pair<std::string, std::string>> getTilesAtLocation(int row, int col);      std::string getTileByTypeAndLocation(int row, int col, std::string type) const      { diff --git a/ecp5/arch_place.cc b/ecp5/arch_place.cc index ff70bb5a..e5c9b31f 100644 --- a/ecp5/arch_place.cc +++ b/ecp5/arch_place.cc @@ -18,8 +18,10 @@   */  #include "cells.h" +#include "design_utils.h"  #include "log.h"  #include "nextpnr.h" +#include "timing.h"  #include "util.h"  NEXTPNR_NAMESPACE_BEGIN @@ -115,4 +117,76 @@ bool Arch::isValidBelForCell(CellInfo *cell, BelId bel) const      }  } +void Arch::permute_luts() +{ +    NetCriticalityMap nc; +    get_criticalities(getCtx(), &nc); + +    std::unordered_map<PortInfo *, size_t> port_to_user; +    for (auto net : sorted(nets)) { +        NetInfo *ni = net.second; +        for (size_t i = 0; i < ni->users.size(); i++) { +            auto &usr = ni->users.at(i); +            port_to_user[&(usr.cell->ports.at(usr.port))] = i; +        } +    } + +    auto proc_lut = [&](CellInfo *ci, int lut) { +        std::vector<IdString> port_names; +        for (int i = 0; i < 4; i++) +            port_names.push_back(id(std::string("ABCD").substr(i, 1) + std::to_string(lut))); + +        std::vector<std::pair<float, int>> inputs; +        std::vector<NetInfo *> orig_nets; + +        for (int i = 0; i < 4; i++) { +            auto &port = ci->ports.at(port_names.at(i)); +            float crit = 0; +            if (port.net != nullptr && nc.count(port.net->name)) { +                auto &n = nc.at(port.net->name); +                size_t usr = port_to_user.at(&port); +                if (usr < n.criticality.size()) +                    crit = n.criticality.at(usr); +            } +            orig_nets.push_back(port.net); +            inputs.emplace_back(crit, i); +        } +        // Least critical first (A input is slowest) +        std::sort(inputs.begin(), inputs.end()); +        for (int i = 0; i < 4; i++) { +            IdString p = port_names.at(i); +            // log_info("%s %s %f\n", p.c_str(ctx), port_names.at(inputs.at(i).second).c_str(ctx), inputs.at(i).first); +            disconnect_port(getCtx(), ci, p); +            ci->ports.at(p).net = nullptr; +            if (orig_nets.at(inputs.at(i).second) != nullptr) { +                connect_port(getCtx(), orig_nets.at(inputs.at(i).second), ci, p); +                ci->params[id(p.str(this) + "MUX")] = p.str(this); +            } else { +                ci->params[id(p.str(this) + "MUX")] = "1"; +            } +        } +        // Rewrite function +        int old_init = int_or_default(ci->params, id("LUT" + std::to_string(lut) + "_INITVAL"), 0); +        int new_init = 0; +        for (int i = 0; i < 16; i++) { +            int old_index = 0; +            for (int k = 0; k < 4; k++) { +                if (i & (1 << k)) +                    old_index |= (1 << inputs.at(k).second); +            } +            if (old_init & (1 << old_index)) +                new_init |= (1 << i); +        } +        ci->params[id("LUT" + std::to_string(lut) + "_INITVAL")] = std::to_string(new_init); +    }; + +    for (auto cell : sorted(cells)) { +        CellInfo *ci = cell.second; +        if (ci->type == id_TRELLIS_SLICE && str_or_default(ci->params, id("MODE"), "LOGIC") == "LOGIC") { +            proc_lut(ci, 0); +            proc_lut(ci, 1); +        } +    } +} +  NEXTPNR_NAMESPACE_END diff --git a/ecp5/archdefs.h b/ecp5/archdefs.h index bfc5769b..d7ea0a8e 100644 --- a/ecp5/archdefs.h +++ b/ecp5/archdefs.h @@ -159,7 +159,9 @@ struct ArchCellInfo      {          bool using_dff;          bool has_l6mux; +        bool is_carry;          IdString clk_sig, lsr_sig, clkmux, lsrmux, srmode; +        int sd0, sd1;      } sliceInfo;  }; diff --git a/ecp5/globals.cc b/ecp5/globals.cc index 49947b20..fae2c683 100644 --- a/ecp5/globals.cc +++ b/ecp5/globals.cc @@ -448,6 +448,8 @@ class Ecp5GlobalRouter              if (i < 8)                  fab_globals.insert(i);          } +        std::vector<std::pair<PortRef *, int>> toroute; +        std::unordered_map<int, NetInfo *> clocks;          for (auto cell : sorted(ctx->cells)) {              CellInfo *ci = cell.second;              if (ci->type == id_DCCA) { @@ -472,15 +474,18 @@ class Ecp5GlobalRouter                  NPNR_ASSERT(routed);                  // WCK must have routing priority -                auto sorted_users = clock->users; -                std::sort(sorted_users.begin(), sorted_users.end(), [this](const PortRef &a, const PortRef &b) { -                    return global_route_priority(a) < global_route_priority(b); -                }); -                for (const auto &user : sorted_users) { -                    route_logic_tile_global(clock, glbid, user); -                } +                for (auto &user : clock->users) +                    toroute.emplace_back(&user, glbid); +                clocks[glbid] = clock;              }          } +        std::sort(toroute.begin(), toroute.end(), +                  [this](const std::pair<PortRef *, int> &a, const std::pair<PortRef *, int> &b) { +                      return global_route_priority(*a.first) < global_route_priority(*b.first); +                  }); +        for (const auto &user : toroute) { +            route_logic_tile_global(clocks.at(user.second), user.second, *user.first); +        }      }  };  void promote_ecp5_globals(Context *ctx) { Ecp5GlobalRouter(ctx).promote_globals(); } diff --git a/ecp5/pack.cc b/ecp5/pack.cc index 64682fd2..1b07c2ae 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -27,6 +27,7 @@  #include "design_utils.h"  #include "globals.h"  #include "log.h" +#include "timing.h"  #include "util.h"  NEXTPNR_NAMESPACE_BEGIN @@ -164,6 +165,7 @@ class Ecp5Packer              CellInfo *ci = cell.second;              if (is_lut(ctx, ci) && procdLuts.find(cell.first) == procdLuts.end()) {                  NetInfo *znet = ci->ports.at(ctx->id("Z")).net; +                std::vector<NetInfo *> inpnets;                  if (znet != nullptr) {                      for (auto user : znet->users) {                          if (is_lut(ctx, user.cell) && user.cell != ci && @@ -228,12 +230,67 @@ class Ecp5Packer                          }                      }                  } + +                // Pack LUTs feeding the same CCU2, RAM or DFF into a SLICE +                if (znet != nullptr && znet->users.size() < 10) { +                    for (auto user : znet->users) { +                        if (is_lc(ctx, user.cell) || user.cell->type == ctx->id("DP16KD") || is_ff(ctx, user.cell)) { +                            for (auto port : user.cell->ports) { +                                if (port.second.type != PORT_IN || port.second.net == nullptr || +                                    port.second.net == znet) +                                    continue; +                                if (port.second.net->users.size() > 10) +                                    continue; +                                CellInfo *drv = port.second.net->driver.cell; +                                if (drv == nullptr) +                                    continue; +                                if (is_lut(ctx, drv) && !procdLuts.count(drv->name) && +                                    can_pack_lutff(ci->name, drv->name)) { +                                    procdLuts.insert(ci->name); +                                    procdLuts.insert(drv->name); +                                    lutPairs[ci->name] = drv->name; +                                    goto paired_inlut; +                                } +                            } +                        } +                    } +                } + +                // Pack LUTs sharing an input with a simple fanout-based heuristic +                for (const char *inp : {"A", "B", "C", "D"}) { +                    NetInfo *innet = ci->ports.at(ctx->id(inp)).net; +                    if (innet != nullptr && innet->users.size() < 5 && innet->users.size() > 1) +                        inpnets.push_back(innet); +                } +                std::sort(inpnets.begin(), inpnets.end(), +                          [&](const NetInfo *a, const NetInfo *b) { return a->users.size() < b->users.size(); }); +                for (auto inet : inpnets) { +                    for (auto &user : inet->users) { +                        if (user.cell == nullptr || user.cell == ci || !is_lut(ctx, user.cell)) +                            continue; +                        if (procdLuts.count(user.cell->name)) +                            continue; +                        if (can_pack_lutff(ci->name, user.cell->name)) { +                            procdLuts.insert(ci->name); +                            procdLuts.insert(user.cell->name); +                            lutPairs[ci->name] = user.cell->name; +                            goto paired_inlut; +                        } +                    } +                } +                  if (false) {                  paired_inlut:                      continue;                  }              }          } +        if (ctx->debug) { +            log_info("Singleton LUTs (packer QoR debug): \n"); +            for (auto cell : sorted(ctx->cells)) +                if (is_lut(ctx, cell.second) && !procdLuts.count(cell.first)) +                    log_info("     %s\n", cell.first.c_str(ctx)); +        }      }      // Return true if an port is a top level port that provides its own IOBUF @@ -940,11 +997,11 @@ class Ecp5Packer              if (is_lut(ctx, ci)) {                  std::unique_ptr<CellInfo> slice =                          create_ecp5_cell(ctx, ctx->id("TRELLIS_SLICE"), ci->name.str(ctx) + "_SLICE"); -                lut_to_slice(ctx, ci, slice.get(), 0); +                lut_to_slice(ctx, ci, slice.get(), 1);                  auto ff = lutffPairs.find(ci->name);                  if (ff != lutffPairs.end()) { -                    ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 0, true); +                    ff_to_slice(ctx, ctx->cells.at(ff->second).get(), slice.get(), 1, true);                      packed_cells.insert(ff->second);                      fflutPairs.erase(ff->second);                      lutffPairs.erase(ci->name); @@ -1825,7 +1882,8 @@ class Ecp5Packer                  }                  iol->params[ctx->id("DELAY.DEL_VALUE")] =                          std::to_string(lookup_delay(str_or_default(ci->params, ctx->id("DEL_MODE"), "USER_DEFINED"))); -                if (ci->params.count(ctx->id("DEL_VALUE")) && ci->params.at(ctx->id("DEL_VALUE")) != "DELAY0") +                if (ci->params.count(ctx->id("DEL_VALUE")) && +                    ci->params.at(ctx->id("DEL_VALUE")).substr(0, 5) != "DELAY")                      iol->params[ctx->id("DELAY.DEL_VALUE")] = ci->params.at(ctx->id("DEL_VALUE"));                  if (ci->ports.count(id_LOADN))                      replace_port(ci, id_LOADN, iol, id_LOADN); @@ -2388,6 +2446,9 @@ void Arch::assignArchInfo()              ci->sliceInfo.clkmux = id(str_or_default(ci->params, id_CLKMUX, "CLK"));              ci->sliceInfo.lsrmux = id(str_or_default(ci->params, id_LSRMUX, "LSR"));              ci->sliceInfo.srmode = id(str_or_default(ci->params, id_SRMODE, "LSR_OVER_CE")); +            ci->sliceInfo.is_carry = str_or_default(ci->params, id("MODE"), "LOGIC") == "CCU2"; +            ci->sliceInfo.sd0 = int_or_default(ci->params, id("REG0_SD"), 0); +            ci->sliceInfo.sd1 = int_or_default(ci->params, id("REG1_SD"), 0);              ci->sliceInfo.has_l6mux = false;              if (ci->ports.count(id_FXA) && ci->ports[id_FXA].net != nullptr &&                  ci->ports[id_FXA].net->driver.port == id_OFX0) diff --git a/ecp5/trellis_import.py b/ecp5/trellis_import.py index 6acc32c5..610bd331 100755 --- a/ecp5/trellis_import.py +++ b/ecp5/trellis_import.py @@ -153,7 +153,7 @@ speed_grade_names = ["6", "7", "8", "8_5G"]  speed_grade_cells = {}  speed_grade_pips = {} -pip_class_to_idx = {"default": 0} +pip_class_to_idx = {"default": 0, "zero": 1}  timing_port_xform = {      "RAD0": "D0", @@ -199,7 +199,7 @@ def process_timing_data():          pip_class_delays = []          for i in range(len(pip_class_to_idx)):              pip_class_delays.append((50, 50, 0, 0)) - +        pip_class_delays[pip_class_to_idx["zero"]] = (0, 0, 0, 0)          with open(timing_dbs.interconnect_db_path("ECP5", grade)) as f:              interconn_data = json.load(f)          for pipclass, pipdata in sorted(interconn_data.items()): @@ -219,6 +219,12 @@ def process_timing_data():  def get_pip_class(wire_from, wire_to): + +    if "FCO" in wire_from or "FCI" in wire_to: +        return pip_class_to_idx["zero"] +    if "F5" in wire_from or "FX" in wire_from or "FXA" in wire_to or "FXB" in wire_to: +        return pip_class_to_idx["zero"] +      class_name = pip_classes.get_pip_class(wire_from, wire_to)      if class_name is None or class_name not in pip_class_to_idx:          class_name = "default" | 
