aboutsummaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
Diffstat (limited to 'common')
-rw-r--r--common/nextpnr.h15
-rw-r--r--common/placer1.cc5
-rw-r--r--common/timing.cc337
3 files changed, 268 insertions, 89 deletions
diff --git a/common/nextpnr.h b/common/nextpnr.h
index 92880089..6195329b 100644
--- a/common/nextpnr.h
+++ b/common/nextpnr.h
@@ -291,6 +291,19 @@ struct CellInfo : ArchCellInfo
// parent.[xyz] := 0 when (constr_parent == nullptr)
};
+enum TimingPortClass
+{
+ TMG_CLOCK_INPUT, // Clock input to a sequential cell
+ TMG_GEN_CLOCK, // Generated clock output (PLL, DCC, etc)
+ TMG_REGISTER_INPUT, // Input to a register, with an associated clock (may also have comb. fanout too)
+ TMG_REGISTER_OUTPUT, // Output from a register
+ TMG_COMB_INPUT, // Combinational input, no paths end here
+ TMG_COMB_OUTPUT, // Combinational output, no paths start here
+ TMG_STARTPOINT, // Unclocked primary startpoint, such as an IO cell output
+ TMG_ENDPOINT, // Unclocked primary endpoint, such as an IO cell input
+ TMG_IGNORE, // Asynchronous to all clocks, "don't care", and should be ignored (false path) for analysis
+};
+
struct DeterministicRNG
{
uint64_t rngstate;
@@ -440,7 +453,7 @@ struct BaseCtx
const Context *getCtx() const { return reinterpret_cast<const Context *>(this); }
- template<typename T> const char *nameOf(const T *obj)
+ template <typename T> const char *nameOf(const T *obj)
{
if (obj == nullptr)
return "";
diff --git a/common/placer1.cc b/common/placer1.cc
index 1d00e77a..51ef271a 100644
--- a/common/placer1.cc
+++ b/common/placer1.cc
@@ -236,7 +236,10 @@ class SAPlacer
temp = post_legalise_temp;
diameter *= post_legalise_dia_scale;
ctx->shuffle(autoplaced);
- assign_budget(ctx);
+
+ // Legalisation is a big change so force a slack redistribution here
+ if (ctx->slack_redist_iter > 0)
+ assign_budget(ctx, true /* quiet */);
} else if (ctx->slack_redist_iter > 0 && iter % ctx->slack_redist_iter == 0) {
assign_budget(ctx, true /* quiet */);
}
diff --git a/common/timing.cc b/common/timing.cc
index c00e1ba5..aadd8381 100644
--- a/common/timing.cc
+++ b/common/timing.cc
@@ -20,6 +20,7 @@
#include "timing.h"
#include <algorithm>
+#include <boost/range/adaptor/reversed.hpp>
#include <unordered_map>
#include <utility>
#include "log.h"
@@ -36,10 +37,18 @@ struct Timing
bool net_delays;
bool update;
delay_t min_slack;
- PortRefVector current_path;
PortRefVector *crit_path;
DelayFrequency *slack_histogram;
+ struct TimingData
+ {
+ TimingData() : max_arrival(), max_path_length(), min_remaining_budget() {}
+ TimingData(delay_t max_arrival) : max_arrival(max_arrival), max_path_length(), min_remaining_budget() {}
+ delay_t max_arrival;
+ unsigned max_path_length = 0;
+ delay_t min_remaining_budget;
+ };
+
Timing(Context *ctx, bool net_delays, bool update, PortRefVector *crit_path = nullptr,
DelayFrequency *slack_histogram = nullptr)
: ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->target_freq),
@@ -47,93 +56,246 @@ struct Timing
{
}
- delay_t follow_net(NetInfo *net, int path_length, delay_t slack)
+ delay_t walk_paths()
{
- const delay_t default_budget = slack / (path_length + 1);
- delay_t net_budget = default_budget;
- for (auto &usr : net->users) {
- auto delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();
- if (crit_path)
- current_path.push_back(&usr);
- // If budget override exists, use that value and do not increment path_length
- auto budget = default_budget;
- if (ctx->getBudgetOverride(net, usr, budget)) {
- if (update)
- usr.budget = std::min(usr.budget, budget);
- budget = follow_user_port(usr, path_length, slack - budget);
- net_budget = std::min(net_budget, budget);
+ const auto clk_period = delay_t(1.0e12 / ctx->target_freq);
+
+ // First, compute the topographical order of nets to walk through the circuit, assuming it is a _acyclic_ graph
+ // TODO(eddieh): Handle the case where it is cyclic, e.g. combinatorial loops
+ std::vector<NetInfo *> topographical_order;
+ std::unordered_map<const NetInfo *, TimingData> net_data;
+ // In lieu of deleting edges from the graph, simply count the number of fanins to each output port
+ std::unordered_map<const PortInfo *, unsigned> port_fanin;
+
+ std::vector<IdString> input_ports;
+ std::vector<const PortInfo *> output_ports;
+ for (auto &cell : ctx->cells) {
+ input_ports.clear();
+ output_ports.clear();
+ for (auto &port : cell.second->ports) {
+ if (!port.second.net)
+ continue;
+ if (port.second.type == PORT_OUT)
+ output_ports.push_back(&port.second);
+ else
+ input_ports.push_back(port.first);
}
- else {
- budget = follow_user_port(usr, path_length + 1, slack - delay);
- net_budget = std::min(net_budget, budget);
- if (update)
- usr.budget = std::min(usr.budget, delay + budget);
+
+ for (auto o : output_ports) {
+ IdString clockPort;
+ TimingPortClass portClass = ctx->getPortTimingClass(cell.second.get(), o->name, clockPort);
+ // If output port is influenced by a clock (e.g. FF output) then add it to the ordering as a timing
+ // start-point
+ if (portClass == TMG_REGISTER_OUTPUT) {
+ DelayInfo clkToQ;
+ ctx->getCellDelay(cell.second.get(), clockPort, o->name, clkToQ);
+ topographical_order.emplace_back(o->net);
+ net_data.emplace(o->net, TimingData{clkToQ.maxDelay()});
+ } else {
+ // TODO(eddieh): Generated clocks and ignored ports are currently added into the ordering as if it
+ // was a regular timing start point in order to enable the full topographical order to be computed,
+ // however these false nets (and their downstream paths) should not be in the final ordering
+ if (portClass == TMG_STARTPOINT || portClass == TMG_GEN_CLOCK || portClass == TMG_IGNORE) {
+ topographical_order.emplace_back(o->net);
+ net_data.emplace(o->net, TimingData{});
+ }
+ // Otherwise, for all driven input ports on this cell, if a timing arc exists between the input and
+ // the current output port, increment fanin counter
+ for (auto i : input_ports) {
+ DelayInfo comb_delay;
+ bool is_path = ctx->getCellDelay(cell.second.get(), i, o->name, comb_delay);
+ if (is_path)
+ port_fanin[o]++;
+ }
+ }
}
- if (crit_path)
- current_path.pop_back();
}
- return net_budget;
- }
- // Follow a path, returning budget to annotate
- delay_t follow_user_port(PortRef &user, int path_length, delay_t slack)
- {
- delay_t value;
- if (ctx->getPortClock(user.cell, user.port) != IdString()) {
- // At the end of a timing path (arguably, should check setup time
- // here too)
- value = slack / path_length;
- if (slack < min_slack) {
- min_slack = slack;
- if (crit_path)
- *crit_path = current_path;
+ // If these constant nets exist, add them to the topographical ordering too
+ // TODO(eddieh): Also false paths and should be removed from ordering
+ auto it = ctx->nets.find(ctx->id("$PACKER_VCC_NET"));
+ if (it != ctx->nets.end()) {
+ topographical_order.emplace_back(it->second.get());
+ net_data.emplace(it->second.get(), TimingData{});
+ }
+ it = ctx->nets.find(ctx->id("$PACKER_GND_NET"));
+ if (it != ctx->nets.end()) {
+ topographical_order.emplace_back(it->second.get());
+ net_data.emplace(it->second.get(), TimingData{});
+ }
+
+ std::deque<NetInfo *> queue(topographical_order.begin(), topographical_order.end());
+
+ // Now walk the design, from the start points identified previously, building up a topographical order
+ while (!queue.empty()) {
+ const auto net = queue.front();
+ queue.pop_front();
+
+ for (auto &usr : net->users) {
+ IdString clockPort;
+ TimingPortClass usrClass = ctx->getPortTimingClass(usr.cell, usr.port, clockPort);
+ if (usrClass == TMG_IGNORE || usrClass == TMG_CLOCK_INPUT)
+ continue;
+ for (auto &port : usr.cell->ports) {
+ if (port.second.type != PORT_OUT || !port.second.net)
+ continue;
+ TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, port.first, clockPort);
+
+ // Skip if this is a clocked output (but allow non-clocked ones)
+ if (portClass == TMG_REGISTER_OUTPUT || portClass == TMG_STARTPOINT || portClass == TMG_IGNORE ||
+ portClass == TMG_GEN_CLOCK)
+ continue;
+ DelayInfo comb_delay;
+ bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
+ if (!is_path)
+ continue;
+ // Decrement the fanin count, and only add to topographical order if all its fanins have already
+ // been visited
+ auto it = port_fanin.find(&port.second);
+ NPNR_ASSERT(it != port_fanin.end());
+ if (--it->second == 0) {
+ topographical_order.emplace_back(port.second.net);
+ queue.emplace_back(port.second.net);
+ port_fanin.erase(it);
+ }
+ }
}
- if (slack_histogram) {
- int slack_ps = ctx->getDelayNS(slack) * 1000;
- (*slack_histogram)[slack_ps]++;
+ }
+
+ // Sanity check to ensure that all ports where fanins were recorded were indeed visited
+ NPNR_ASSERT(port_fanin.empty());
+
+ // Go forwards topographically to find the maximum arrival time and max path length for each net
+ for (auto net : topographical_order) {
+ auto &nd = net_data.at(net);
+ const auto net_arrival = nd.max_arrival;
+ const auto net_length_plus_one = nd.max_path_length + 1;
+ nd.min_remaining_budget = clk_period;
+ for (auto &usr : net->users) {
+ IdString clockPort;
+ TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, clockPort);
+ if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT || portClass == TMG_IGNORE) {
+ } else {
+ auto net_delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();
+ auto budget_override = ctx->getBudgetOverride(net, usr, net_delay);
+ auto usr_arrival = net_arrival + net_delay;
+ // Iterate over all output ports on the same cell as the sink
+ for (auto port : usr.cell->ports) {
+ if (port.second.type != PORT_OUT || !port.second.net)
+ continue;
+ DelayInfo comb_delay;
+ // Look up delay through this path
+ bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
+ if (!is_path)
+ continue;
+ auto &data = net_data[port.second.net];
+ auto &arrival = data.max_arrival;
+ arrival = std::max(arrival, usr_arrival + comb_delay.maxDelay());
+ if (!budget_override) { // Do not increment path length if budget overriden since it doesn't
+ // require a share of the slack
+ auto &path_length = data.max_path_length;
+ path_length = std::max(path_length, net_length_plus_one);
+ }
+ }
+ }
}
- } else {
- // Default to the path ending here, if no further paths found
- value = slack / path_length;
- // Follow outputs of the user
- for (auto port : user.cell->ports) {
- if (port.second.type == PORT_OUT) {
- DelayInfo comb_delay;
- // Look up delay through this path
- bool is_path = ctx->getCellDelay(user.cell, user.port, port.first, comb_delay);
- if (is_path) {
- NetInfo *net = port.second.net;
- if (net) {
- delay_t path_budget = follow_net(net, path_length, slack - comb_delay.maxDelay());
- value = std::min(value, path_budget);
+ }
+
+ const NetInfo *crit_net = nullptr;
+
+ // Now go backwards topographically to determine the minimum path slack, and to distribute all path slack evenly
+ // between all nets on the path
+ for (auto net : boost::adaptors::reverse(topographical_order)) {
+ auto &nd = net_data.at(net);
+ const delay_t net_length_plus_one = nd.max_path_length + 1;
+ auto &net_min_remaining_budget = nd.min_remaining_budget;
+ for (auto &usr : net->users) {
+ auto net_delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();
+ auto budget_override = ctx->getBudgetOverride(net, usr, net_delay);
+ IdString associatedClock;
+ TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, associatedClock);
+ if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
+ const auto net_arrival = nd.max_arrival;
+ auto path_budget = clk_period - (net_arrival + net_delay);
+ if (update) {
+ auto budget_share = budget_override ? 0 : path_budget / net_length_plus_one;
+ usr.budget = std::min(usr.budget, net_delay + budget_share);
+ net_min_remaining_budget = std::min(net_min_remaining_budget, path_budget - budget_share);
+ }
+
+ if (path_budget < min_slack) {
+ min_slack = path_budget;
+ if (crit_path) {
+ crit_path->clear();
+ crit_path->push_back(&usr);
+ crit_net = net;
}
}
+ if (slack_histogram) {
+ int slack_ps = ctx->getDelayNS(path_budget) * 1000;
+ (*slack_histogram)[slack_ps]++;
+ }
+ } else if (update) {
+ // Iterate over all output ports on the same cell as the sink
+ for (const auto &port : usr.cell->ports) {
+ if (port.second.type != PORT_OUT || !port.second.net)
+ continue;
+ DelayInfo comb_delay;
+ bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
+ if (!is_path)
+ continue;
+ auto path_budget = net_data.at(port.second.net).min_remaining_budget;
+ auto budget_share = budget_override ? 0 : path_budget / net_length_plus_one;
+ usr.budget = std::min(usr.budget, net_delay + budget_share);
+ net_min_remaining_budget = std::min(net_min_remaining_budget, path_budget - budget_share);
+ }
}
}
}
- return value;
- }
- delay_t walk_paths()
- {
- delay_t default_slack = delay_t(1.0e12 / ctx->target_freq);
+ if (crit_path) {
+ // Walk backwards from the most critical net
+ while (crit_net) {
+ const PortInfo *crit_ipin = nullptr;
+ delay_t max_arrival = std::numeric_limits<delay_t>::min();
- // Go through all clocked drivers and distribute the available path
- // slack evenly into the budget of every sink on the path
- for (auto &cell : ctx->cells) {
- for (auto port : cell.second->ports) {
- if (port.second.type == PORT_OUT) {
- IdString clock_domain = ctx->getPortClock(cell.second.get(), port.first);
- if (clock_domain != IdString()) {
- delay_t slack = default_slack; // TODO: clock constraints
- DelayInfo clkToQ;
- if (ctx->getCellDelay(cell.second.get(), clock_domain, port.first, clkToQ))
- slack -= clkToQ.maxDelay();
- if (port.second.net)
- follow_net(port.second.net, 0, slack);
+ // Look at all input ports on its driving cell
+ for (const auto &port : crit_net->driver.cell->ports) {
+ if (port.second.type != PORT_IN || !port.second.net)
+ continue;
+ DelayInfo comb_delay;
+ bool is_path =
+ ctx->getCellDelay(crit_net->driver.cell, port.first, crit_net->driver.port, comb_delay);
+ if (!is_path)
+ continue;
+ // If input port is influenced by a clock, skip
+ IdString portClock;
+ TimingPortClass portClass = ctx->getPortTimingClass(crit_net->driver.cell, port.first, portClock);
+ if (portClass == TMG_REGISTER_INPUT || portClass == TMG_CLOCK_INPUT || portClass == TMG_ENDPOINT ||
+ portClass == TMG_IGNORE)
+ continue;
+
+ // And find the fanin net with the latest arrival time
+ const auto net_arrival = net_data.at(port.second.net).max_arrival;
+ if (net_arrival > max_arrival) {
+ max_arrival = net_arrival;
+ crit_ipin = &port.second;
}
}
+
+ if (!crit_ipin)
+ break;
+
+ // Now convert PortInfo* into a PortRef*
+ for (auto &usr : crit_ipin->net->users) {
+ if (usr.cell->name == crit_net->driver.cell->name && usr.port == crit_ipin->name) {
+ crit_path->push_back(&usr);
+ break;
+ }
+ }
+ crit_net = crit_ipin->net;
}
+ std::reverse(crit_path->begin(), crit_path->end());
}
return min_slack;
}
@@ -141,10 +303,9 @@ struct Timing
void assign_budget()
{
// Clear delays to a very high value first
- delay_t default_slack = delay_t(1.0e12 / ctx->target_freq);
for (auto &net : ctx->nets) {
for (auto &usr : net.second->users) {
- usr.budget = default_slack;
+ usr.budget = std::numeric_limits<delay_t>::max();
}
}
@@ -180,16 +341,15 @@ void assign_budget(Context *ctx, bool quiet)
}
}
- // For slack redistribution, if user has not specified a frequency
- // dynamically adjust the target frequency to be the currently
- // achieved maximum
+ // For slack redistribution, if user has not specified a frequency dynamically adjust the target frequency to be the
+ // currently achieved maximum
if (ctx->auto_freq && ctx->slack_redist_iter > 0) {
- delay_t default_slack = delay_t(1.0e12 / ctx->target_freq);
- ctx->target_freq = 1e12 / (default_slack - timing.min_slack);
+ delay_t default_slack = delay_t((1.0e9 / ctx->getDelayNS(1)) / ctx->target_freq);
+ ctx->target_freq = 1.0e9 / ctx->getDelayNS(default_slack - timing.min_slack);
if (ctx->verbose)
- log_info("minimum slack for this assign = %d, target Fmax for next "
+ log_info("minimum slack for this assign = %.2f ns, target Fmax for next "
"update = %.2f MHz\n",
- timing.min_slack, ctx->target_freq / 1e6);
+ ctx->getDelayNS(timing.min_slack), ctx->target_freq / 1e6);
}
if (!quiet)
@@ -217,7 +377,9 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_path)
auto &front = crit_path.front();
auto &front_port = front->cell->ports.at(front->port);
auto &front_driver = front_port.net->driver;
- auto last_port = ctx->getPortClock(front_driver.cell, front_driver.port);
+
+ IdString last_port;
+ ctx->getPortTimingClass(front_driver.cell, front_driver.port, last_port);
for (auto sink : crit_path) {
auto sink_cell = sink->cell;
auto &port = sink_cell->ports.at(sink->port);
@@ -227,14 +389,15 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_path)
DelayInfo comb_delay;
ctx->getCellDelay(sink_cell, last_port, driver.port, comb_delay);
total += comb_delay.maxDelay();
- log_info("%4d %4d Source %s.%s\n", comb_delay.maxDelay(), total, driver_cell->name.c_str(ctx),
- driver.port.c_str(ctx));
+ log_info("%4.1f %4.1f Source %s.%s\n", ctx->getDelayNS(comb_delay.maxDelay()), ctx->getDelayNS(total),
+ driver_cell->name.c_str(ctx), driver.port.c_str(ctx));
auto net_delay = ctx->getNetinfoRouteDelay(net, *sink);
total += net_delay;
auto driver_loc = ctx->getBelLocation(driver_cell->bel);
auto sink_loc = ctx->getBelLocation(sink_cell->bel);
- log_info("%4d %4d Net %s budget %d (%d,%d) -> (%d,%d)\n", net_delay, total, net->name.c_str(ctx),
- sink->budget, driver_loc.x, driver_loc.y, sink_loc.x, sink_loc.y);
+ log_info("%4.1f %4.1f Net %s budget %f ns (%d,%d) -> (%d,%d)\n", ctx->getDelayNS(net_delay),
+ ctx->getDelayNS(total), net->name.c_str(ctx), ctx->getDelayNS(sink->budget), driver_loc.x,
+ driver_loc.y, sink_loc.x, sink_loc.y);
log_info(" Sink %s.%s\n", sink_cell->name.c_str(ctx), sink->port.c_str(ctx));
last_port = sink->port;
}
@@ -242,8 +405,8 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_path)
}
}
- delay_t default_slack = delay_t(1.0e12 / ctx->target_freq);
- log_info("estimated Fmax = %.2f MHz\n", 1e6 / (default_slack - min_slack));
+ delay_t default_slack = delay_t((1.0e9 / ctx->getDelayNS(1)) / ctx->target_freq);
+ log_info("estimated Fmax = %.2f MHz\n", 1e3 / ctx->getDelayNS(default_slack - min_slack));
if (print_histogram && slack_histogram.size() > 0) {
constexpr unsigned num_bins = 20;