aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClaire Wolf <clifford@clifford.at>2020-04-22 16:50:45 +0200
committerGitHub <noreply@github.com>2020-04-22 16:50:45 +0200
commit95c74b319b36f8cb950196c3e1d10c945629c1f5 (patch)
treeb45deaf4c88b165bc6be5492bf2944fba935c2c7
parentcd82afb740fc2f1d9bead89fd2683be989acad37 (diff)
parent93288b8eaea3e346275082352edeea5cfb4ac38a (diff)
downloadyosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.tar.gz
yosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.tar.bz2
yosys-95c74b319b36f8cb950196c3e1d10c945629c1f5.zip
Merge pull request #1979 from whitequark/cxxrtl-go-faster
cxxrtl: Gas gas gas! I'm gonna step on the gas! Tonight I'll fly!
-rw-r--r--backends/cxxrtl/cxxrtl.cc573
-rw-r--r--backends/cxxrtl/cxxrtl.h7
2 files changed, 396 insertions, 184 deletions
diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc
index ef8335e50..237700b29 100644
--- a/backends/cxxrtl/cxxrtl.cc
+++ b/backends/cxxrtl/cxxrtl.cc
@@ -171,6 +171,11 @@ struct Scheduler {
}
};
+bool is_input_wire(const RTLIL::Wire *wire)
+{
+ return wire->port_input && !wire->port_output;
+}
+
bool is_unary_cell(RTLIL::IdString type)
{
return type.in(
@@ -210,11 +215,54 @@ bool is_internal_cell(RTLIL::IdString type)
return type[0] == '$' && !type.begins_with("$paramod\\");
}
+bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
+{
+ RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+ log_assert(cell_module != nullptr);
+ return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
+}
+
+enum class CxxrtlPortType {
+ UNKNOWN = 0, // or mixed comb/sync
+ COMB = 1,
+ SYNC = 2,
+};
+
+CxxrtlPortType cxxrtl_port_type(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+ RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+ if (cell_module == nullptr || !cell_module->get_bool_attribute(ID(cxxrtl.blackbox)))
+ return CxxrtlPortType::UNKNOWN;
+ RTLIL::Wire *cell_output_wire = cell_module->wire(port);
+ log_assert(cell_output_wire != nullptr);
+ bool is_comb = cell_output_wire->get_bool_attribute(ID(cxxrtl.comb));
+ bool is_sync = cell_output_wire->get_bool_attribute(ID(cxxrtl.sync));
+ if (is_comb && is_sync)
+ log_cmd_error("Port `%s.%s' is marked as both `cxxrtl.comb` and `cxxrtl.sync`.\n",
+ log_id(cell_module), log_signal(cell_output_wire));
+ else if (is_comb)
+ return CxxrtlPortType::COMB;
+ else if (is_sync)
+ return CxxrtlPortType::SYNC;
+ return CxxrtlPortType::UNKNOWN;
+}
+
+bool is_cxxrtl_comb_port(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+ return cxxrtl_port_type(cell, port) == CxxrtlPortType::COMB;
+}
+
+bool is_cxxrtl_sync_port(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+ return cxxrtl_port_type(cell, port) == CxxrtlPortType::SYNC;
+}
+
struct FlowGraph {
struct Node {
enum class Type {
CONNECT,
- CELL,
+ CELL_SYNC,
+ CELL_EVAL,
PROCESS
};
@@ -225,7 +273,7 @@ struct FlowGraph {
};
std::vector<Node*> nodes;
- dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_defs, wire_uses;
+ dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
dict<const RTLIL::Wire*, bool> wire_def_elidable, wire_use_elidable;
~FlowGraph()
@@ -234,13 +282,17 @@ struct FlowGraph {
delete node;
}
- void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable)
+ void add_defs(Node *node, const RTLIL::SigSpec &sig, bool fully_sync, bool elidable)
{
for (auto chunk : sig.chunks())
- if (chunk.wire)
- wire_defs[chunk.wire].insert(node);
- // Only defs of an entire wire in the right order can be elided.
- if (sig.is_wire())
+ if (chunk.wire) {
+ if (fully_sync)
+ wire_sync_defs[chunk.wire].insert(node);
+ else
+ wire_comb_defs[chunk.wire].insert(node);
+ }
+ // Only comb defs of an entire wire in the right order can be elided.
+ if (!fully_sync && sig.is_wire())
wire_def_elidable[sig.as_wire()] = elidable;
}
@@ -268,7 +320,7 @@ struct FlowGraph {
// Connections
void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn)
{
- add_defs(node, conn.first, /*elidable=*/true);
+ add_defs(node, conn.first, /*fully_sync=*/false, /*elidable=*/true);
add_uses(node, conn.second);
}
@@ -283,21 +335,59 @@ struct FlowGraph {
}
// Cells
- void add_cell_defs_uses(Node *node, const RTLIL::Cell *cell)
+ void add_cell_sync_defs(Node *node, const RTLIL::Cell *cell)
+ {
+ // To understand why this node type is necessary and why it produces comb defs, consider a cell
+ // with input \i and sync output \o, used in a design such that \i is connected to \o. This does
+ // not result in a feedback arc because the output is synchronous. However, a naive implementation
+ // of code generation for cells that assigns to inputs, evaluates cells, assigns from outputs
+ // would not be able to immediately converge...
+ //
+ // wire<1> i_tmp;
+ // cell->p_i = i_tmp.curr;
+ // cell->eval();
+ // i_tmp.next = cell->p_o.curr;
+ //
+ // ... since the wire connecting the input and output ports would not be localizable. To solve
+ // this, the cell is split into two scheduling nodes; one exclusively for sync outputs, and
+ // another for inputs and all non-sync outputs. This way the generated code can be rearranged...
+ //
+ // value<1> i_tmp;
+ // i_tmp = cell->p_o.curr;
+ // cell->p_i = i_tmp;
+ // cell->eval();
+ //
+ // eliminating the unnecessary delta cycle. Conceptually, the CELL_SYNC node type is a series of
+ // connections of the form `connect \lhs \cell.\sync_output`; the right-hand side of these is not
+ // as a wire in RTLIL. If it was expressible, then `\cell.\sync_output` would have a sync def,
+ // and this node would be an ordinary CONNECT node, with `\lhs` having a comb def. Because it isn't,
+ // a special node type is used, the right-hand side does not appear anywhere, and the left-hand
+ // side has a comb def.
+ for (auto conn : cell->connections())
+ if (cell->output(conn.first))
+ if (is_cxxrtl_sync_port(cell, conn.first)) {
+ // See note regarding elidability below.
+ add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
+ }
+ }
+
+ void add_cell_eval_defs_uses(Node *node, const RTLIL::Cell *cell)
{
- log_assert(cell->known());
for (auto conn : cell->connections()) {
if (cell->output(conn.first)) {
- if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
- /* non-combinatorial outputs do not introduce defs */;
- else if (is_elidable_cell(cell->type))
- add_defs(node, conn.second, /*elidable=*/true);
+ if (is_elidable_cell(cell->type))
+ add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/true);
+ else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
+ add_defs(node, conn.second, /*fully_sync=*/true, /*elidable=*/false);
else if (is_internal_cell(cell->type))
- add_defs(node, conn.second, /*elidable=*/false);
- else {
- // Unlike outputs of internal cells (which generate code that depends on the ability to set the output
- // wire bits), outputs of user cells are normal wires, and the wires connected to them can be elided.
- add_defs(node, conn.second, /*elidable=*/true);
+ add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
+ else if (!is_cxxrtl_sync_port(cell, conn.first)) {
+ // Although at first it looks like outputs of user-defined cells may always be elided, the reality is
+ // more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb
+ // outputs are assigned in a different way depending on whether the cell's eval() immediately converged.
+ // Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify
+ // the infrastructure required to elide outputs of cells with many of them.
+ add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
}
}
if (cell->input(conn.first))
@@ -307,11 +397,27 @@ struct FlowGraph {
Node *add_node(const RTLIL::Cell *cell)
{
+ log_assert(cell->known());
+
+ bool has_fully_sync_outputs = false;
+ for (auto conn : cell->connections())
+ if (cell->output(conn.first) && is_cxxrtl_sync_port(cell, conn.first)) {
+ has_fully_sync_outputs = true;
+ break;
+ }
+ if (has_fully_sync_outputs) {
+ Node *node = new Node;
+ node->type = Node::Type::CELL_SYNC;
+ node->cell = cell;
+ nodes.push_back(node);
+ add_cell_sync_defs(node, cell);
+ }
+
Node *node = new Node;
- node->type = Node::Type::CELL;
+ node->type = Node::Type::CELL_EVAL;
node->cell = cell;
nodes.push_back(node);
- add_cell_defs_uses(node, cell);
+ add_cell_eval_defs_uses(node, cell);
return node;
}
@@ -319,7 +425,7 @@ struct FlowGraph {
void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_)
{
for (auto &action : case_->actions) {
- add_defs(node, action.first, /*elidable=*/false);
+ add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
add_uses(node, action.second);
}
for (auto sub_switch : case_->switches) {
@@ -338,9 +444,9 @@ struct FlowGraph {
for (auto sync : process->syncs)
for (auto action : sync->actions) {
if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe)
- /* sync actions do not introduce feedback */;
+ add_defs(node, action.first, /*is_sync=*/true, /*elidable=*/false);
else
- add_defs(node, action.first, /*elidable=*/false);
+ add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
add_uses(node, action.second);
}
}
@@ -356,13 +462,6 @@ struct FlowGraph {
}
};
-bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
-{
- RTLIL::Module *cell_module = cell->module->design->module(cell->type);
- log_assert(cell_module != nullptr);
- return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
-}
-
std::vector<std::string> split_by(const std::string &str, const std::string &sep)
{
std::vector<std::string> result;
@@ -414,22 +513,24 @@ struct CxxrtlWorker {
bool elide_public = false;
bool localize_internal = false;
bool localize_public = false;
- bool run_splitnets = false;
+ bool run_opt_clean_purge = false;
+ bool run_proc_flatten = false;
+ bool max_opt_level = false;
std::ostringstream f;
std::string indent;
int temporary = 0;
dict<const RTLIL::Module*, SigMap> sigmaps;
- pool<const RTLIL::Wire*> sync_wires;
- dict<RTLIL::SigBit, RTLIL::SyncType> sync_types;
+ pool<const RTLIL::Wire*> edge_wires;
+ dict<RTLIL::SigBit, RTLIL::SyncType> edge_types;
pool<const RTLIL::Memory*> writable_memories;
dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for;
- dict<const RTLIL::Cell*, dict<RTLIL::Wire*, RTLIL::IdString>> cell_wire_defs;
dict<const RTLIL::Wire*, FlowGraph::Node> elided_wires;
dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule;
pool<const RTLIL::Wire*> localized_wires;
dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations;
+ dict<const RTLIL::Module*, bool> eval_converges;
void inc_indent() {
indent += "\t";
@@ -669,18 +770,14 @@ struct CxxrtlWorker {
case FlowGraph::Node::Type::CONNECT:
dump_connect_elided(node.connect);
break;
- case FlowGraph::Node::Type::CELL:
- if (is_elidable_cell(node.cell->type)) {
- dump_cell_elided(node.cell);
- } else {
- const char *access = is_cxxrtl_blackbox_cell(node.cell) ? "->" : ".";
- f << mangle(node.cell) << access << mangle_wire_name(cell_wire_defs[node.cell][chunk.wire]) << ".curr";
- }
+ case FlowGraph::Node::Type::CELL_EVAL:
+ log_assert(is_elidable_cell(node.cell->type));
+ dump_cell_elided(node.cell);
break;
default:
log_assert(false);
}
- } else if (localized_wires[chunk.wire]) {
+ } else if (localized_wires[chunk.wire] || is_input_wire(chunk.wire)) {
f << mangle(chunk.wire);
} else {
f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr");
@@ -740,8 +837,8 @@ struct CxxrtlWorker {
case FlowGraph::Node::Type::CONNECT:
collect_connect(node.connect, cells);
break;
- case FlowGraph::Node::Type::CELL:
- collect_cell(node.cell, cells);
+ case FlowGraph::Node::Type::CELL_EVAL:
+ collect_cell_eval(node.cell, cells);
break;
default:
log_assert(false);
@@ -780,6 +877,19 @@ struct CxxrtlWorker {
f << ";\n";
}
+ void dump_cell_sync(const RTLIL::Cell *cell)
+ {
+ const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
+ f << indent << "// cell " << cell->name.str() << " syncs\n";
+ for (auto conn : cell->connections())
+ if (cell->output(conn.first))
+ if (is_cxxrtl_sync_port(cell, conn.first)) {
+ f << indent;
+ dump_sigspec_lhs(conn.second);
+ f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
+ }
+ }
+
void dump_cell_elided(const RTLIL::Cell *cell)
{
// Unary cells
@@ -833,7 +943,7 @@ struct CxxrtlWorker {
elided_wires.count(cell->getPort(ID::Y).as_wire());
}
- void collect_cell(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
+ void collect_cell_eval(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
{
if (!is_cell_elided(cell))
return;
@@ -844,7 +954,7 @@ struct CxxrtlWorker {
collect_sigspec_rhs(port.second, cells);
}
- void dump_cell(const RTLIL::Cell *cell)
+ void dump_cell_eval(const RTLIL::Cell *cell)
{
if (is_cell_elided(cell))
return;
@@ -1088,26 +1198,69 @@ struct CxxrtlWorker {
log_assert(cell->known());
const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
for (auto conn : cell->connections())
- if (cell->input(conn.first)) {
+ if (cell->input(conn.first) && !cell->output(conn.first)) {
+ f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << " = ";
+ dump_sigspec_rhs(conn.second);
+ f << ";\n";
+ if (getenv("CXXRTL_VOID_MY_WARRANTY")) {
+ // Until we have proper clock tree detection, this really awful hack that opportunistically
+ // propagates prev_* values for clocks can be used to estimate how much faster a design could
+ // be if only one clock edge was simulated by replacing:
+ // top.p_clk = value<1>{0u}; top.step();
+ // top.p_clk = value<1>{1u}; top.step();
+ // with:
+ // top.prev_p_clk = value<1>{0u}; top.p_clk = value<1>{1u}; top.step();
+ // Don't rely on this; it will be removed without warning.
+ RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+ if (cell_module != nullptr && cell_module->wire(conn.first) && conn.second.is_wire()) {
+ RTLIL::Wire *cell_module_wire = cell_module->wire(conn.first);
+ if (edge_wires[conn.second.as_wire()] && edge_wires[cell_module_wire]) {
+ f << indent << mangle(cell) << access << "prev_" << mangle(cell_module_wire) << " = ";
+ f << "prev_" << mangle(conn.second.as_wire()) << ";\n";
+ }
+ }
+ }
+ } else if (cell->input(conn.first)) {
f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << ".next = ";
dump_sigspec_rhs(conn.second);
f << ";\n";
}
- f << indent << mangle(cell) << access << "eval();\n";
- for (auto conn : cell->connections()) {
- if (conn.second.is_wire()) {
- RTLIL::Wire *wire = conn.second.as_wire();
- if (elided_wires.count(wire) && cell_wire_defs[cell].count(wire))
- continue;
- }
- if (cell->output(conn.first)) {
- if (conn.second.empty())
- continue; // ignore disconnected ports
- f << indent;
- dump_sigspec_lhs(conn.second);
- f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
+ auto assign_from_outputs = [&](bool cell_converged) {
+ for (auto conn : cell->connections()) {
+ if (cell->output(conn.first)) {
+ if (conn.second.empty())
+ continue; // ignore disconnected ports
+ if (is_cxxrtl_sync_port(cell, conn.first))
+ continue; // fully sync ports are handled in CELL_SYNC nodes
+ f << indent;
+ dump_sigspec_lhs(conn.second);
+ f << " = " << mangle(cell) << access << mangle_wire_name(conn.first);
+ // Similarly to how there is no purpose to buffering cell inputs, there is also no purpose to buffering
+ // combinatorial cell outputs in case the cell converges within one cycle. (To convince yourself that
+ // this optimization is valid, consider that, since the cell converged within one cycle, it would not
+ // have any buffered wires if they were not output ports. Imagine inlining the cell's eval() function,
+ // and consider the fate of the localized wires that used to be output ports.)
+ //
+ // Unlike cell inputs (which are never buffered), it is not possible to know apriori whether the cell
+ // (which may be late bound) will converge immediately. Because of this, the choice between using .curr
+ // (appropriate for buffered outputs) and .next (appropriate for unbuffered outputs) is made at runtime.
+ if (cell_converged && is_cxxrtl_comb_port(cell, conn.first))
+ f << ".next;\n";
+ else
+ f << ".curr;\n";
+ }
}
- }
+ };
+ f << indent << "if (" << mangle(cell) << access << "eval()) {\n";
+ inc_indent();
+ assign_from_outputs(/*cell_converged=*/true);
+ dec_indent();
+ f << indent << "} else {\n";
+ inc_indent();
+ f << indent << "converged = false;\n";
+ assign_from_outputs(/*cell_converged=*/false);
+ dec_indent();
+ f << indent << "}\n";
}
}
@@ -1253,21 +1406,17 @@ struct CxxrtlWorker {
}
}
- void dump_wire(const RTLIL::Wire *wire, bool is_local)
+ void dump_wire(const RTLIL::Wire *wire, bool is_local_context)
{
if (elided_wires.count(wire))
return;
+ if (localized_wires.count(wire) != is_local_context)
+ return;
- if (is_local) {
- if (!localized_wires.count(wire))
- return;
-
+ if (is_local_context) {
dump_attrs(wire);
f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n";
} else {
- if (localized_wires.count(wire))
- return;
-
std::string width;
if (wire->module->has_attribute(ID(cxxrtl.blackbox)) && wire->has_attribute(ID(cxxrtl.width))) {
width = wire->get_string_attribute(ID(cxxrtl.width));
@@ -1276,19 +1425,47 @@ struct CxxrtlWorker {
}
dump_attrs(wire);
- f << indent << "wire<" << width << "> " << mangle(wire);
+ f << indent << (is_input_wire(wire) ? "value" : "wire") << "<" << width << "> " << mangle(wire);
if (wire->has_attribute(ID::init)) {
f << " ";
dump_const_init(wire->attributes.at(ID::init));
}
f << ";\n";
- if (sync_wires[wire]) {
- for (auto sync_type : sync_types) {
- if (sync_type.first.wire == wire) {
- if (sync_type.second != RTLIL::STn)
- f << indent << "bool posedge_" << mangle(sync_type.first) << " = false;\n";
- if (sync_type.second != RTLIL::STp)
- f << indent << "bool negedge_" << mangle(sync_type.first) << " = false;\n";
+ if (edge_wires[wire]) {
+ if (is_input_wire(wire)) {
+ f << indent << "value<" << width << "> prev_" << mangle(wire);
+ if (wire->has_attribute(ID::init)) {
+ f << " ";
+ dump_const_init(wire->attributes.at(ID::init));
+ }
+ f << ";\n";
+ }
+ for (auto edge_type : edge_types) {
+ if (edge_type.first.wire == wire) {
+ std::string prev, next;
+ if (is_input_wire(wire)) {
+ prev = "prev_" + mangle(edge_type.first.wire);
+ next = mangle(edge_type.first.wire);
+ } else {
+ prev = mangle(edge_type.first.wire) + ".curr";
+ next = mangle(edge_type.first.wire) + ".next";
+ }
+ prev += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
+ next += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
+ if (edge_type.second != RTLIL::STn) {
+ f << indent << "bool posedge_" << mangle(edge_type.first) << "() const {\n";
+ inc_indent();
+ f << indent << "return !" << prev << " && " << next << ";\n";
+ dec_indent();
+ f << indent << "}\n";
+ }
+ if (edge_type.second != RTLIL::STp) {
+ f << indent << "bool negedge_" << mangle(edge_type.first) << "() const {\n";
+ inc_indent();
+ f << indent << "return " << prev << " && !" << next << ";\n";
+ dec_indent();
+ f << indent << "}\n";
+ }
}
}
}
@@ -1343,16 +1520,36 @@ struct CxxrtlWorker {
void dump_eval_method(RTLIL::Module *module)
{
inc_indent();
+ f << indent << "bool converged = " << (eval_converges.at(module) ? "true" : "false") << ";\n";
if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
+ for (auto wire : module->wires()) {
+ if (edge_wires[wire]) {
+ for (auto edge_type : edge_types) {
+ if (edge_type.first.wire == wire) {
+ if (edge_type.second != RTLIL::STn) {
+ f << indent << "bool posedge_" << mangle(edge_type.first) << " = ";
+ f << "this->posedge_" << mangle(edge_type.first) << "();\n";
+ }
+ if (edge_type.second != RTLIL::STp) {
+ f << indent << "bool negedge_" << mangle(edge_type.first) << " = ";
+ f << "this->negedge_" << mangle(edge_type.first) << "();\n";
+ }
+ }
+ }
+ }
+ }
for (auto wire : module->wires())
- dump_wire(wire, /*is_local=*/true);
+ dump_wire(wire, /*is_local_context=*/true);
for (auto node : schedule[module]) {
switch (node.type) {
case FlowGraph::Node::Type::CONNECT:
dump_connect(node.connect);
break;
- case FlowGraph::Node::Type::CELL:
- dump_cell(node.cell);
+ case FlowGraph::Node::Type::CELL_SYNC:
+ dump_cell_sync(node.cell);
+ break;
+ case FlowGraph::Node::Type::CELL_EVAL:
+ dump_cell_eval(node.cell);
break;
case FlowGraph::Node::Type::PROCESS:
dump_process(node.process);
@@ -1360,14 +1557,7 @@ struct CxxrtlWorker {
}
}
}
- for (auto sync_type : sync_types) {
- if (sync_type.first.wire->module == module) {
- if (sync_type.second != RTLIL::STn)
- f << indent << "posedge_" << mangle(sync_type.first) << " = false;\n";
- if (sync_type.second != RTLIL::STp)
- f << indent << "negedge_" << mangle(sync_type.first) << " = false;\n";
- }
- }
+ f << indent << "return converged;\n";
dec_indent();
}
@@ -1378,39 +1568,13 @@ struct CxxrtlWorker {
for (auto wire : module->wires()) {
if (elided_wires.count(wire) || localized_wires.count(wire))
continue;
- if (sync_wires[wire]) {
- std::string wire_prev = mangle(wire) + "_prev";
- std::string wire_curr = mangle(wire) + ".curr";
- std::string wire_edge = mangle(wire) + "_edge";
- f << indent << "value<" << wire->width << "> " << wire_prev << " = " << wire_curr << ";\n";
- f << indent << "if (" << mangle(wire) << ".commit()) {\n";
- inc_indent();
- f << indent << "value<" << wire->width << "> " << wire_edge << " = "
- << wire_prev << ".bit_xor(" << wire_curr << ");\n";
- for (auto sync_type : sync_types) {
- if (sync_type.first.wire != wire)
- continue;
- if (sync_type.second != RTLIL::STn) {
- f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
- << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
- inc_indent();
- f << indent << "posedge_" << mangle(sync_type.first) << " = true;\n";
- dec_indent();
- }
- if (sync_type.second != RTLIL::STp) {
- f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
- << "!" << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
- inc_indent();
- f << indent << "negedge_" << mangle(sync_type.first) << " = true;\n";
- dec_indent();
- }
- f << indent << "changed = true;\n";
- }
- dec_indent();
- f << indent << "}\n";
- } else if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0) {
- f << indent << "changed |= " << mangle(wire) << ".commit();\n";
+ if (is_input_wire(wire)) {
+ if (edge_wires[wire])
+ f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n";
+ continue;
}
+ if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0)
+ f << indent << "changed |= " << mangle(wire) << ".commit();\n";
}
if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
for (auto memory : module->memories) {
@@ -1466,10 +1630,10 @@ struct CxxrtlWorker {
inc_indent();
for (auto wire : module->wires()) {
if (wire->port_id != 0)
- dump_wire(wire, /*is_local=*/false);
+ dump_wire(wire, /*is_local_context=*/false);
}
f << "\n";
- f << indent << "void eval() override {\n";
+ f << indent << "bool eval() override {\n";
dump_eval_method(module);
f << indent << "}\n";
f << "\n";
@@ -1506,7 +1670,7 @@ struct CxxrtlWorker {
f << indent << "struct " << mangle(module) << " : public module {\n";
inc_indent();
for (auto wire : module->wires())
- dump_wire(wire, /*is_local=*/false);
+ dump_wire(wire, /*is_local_context=*/false);
f << "\n";
bool has_memories = false;
for (auto memory : module->memories) {
@@ -1537,7 +1701,7 @@ struct CxxrtlWorker {
}
if (has_cells)
f << "\n";
- f << indent << "void eval() override;\n";
+ f << indent << "bool eval() override;\n";
f << indent << "bool commit() override;\n";
dec_indent();
f << indent << "}; // struct " << mangle(module) << "\n";
@@ -1549,7 +1713,7 @@ struct CxxrtlWorker {
{
if (module->get_bool_attribute(ID(cxxrtl.blackbox)))
return;
- f << indent << "void " << mangle(module) << "::eval() {\n";
+ f << indent << "bool " << mangle(module) << "::eval() {\n";
dump_eval_method(module);
f << indent << "}\n";
f << "\n";
@@ -1638,16 +1802,18 @@ struct CxxrtlWorker {
log_assert(type == RTLIL::STp || type == RTLIL::STn || type == RTLIL::STe);
RTLIL::SigBit sigbit = signal[0];
- if (!sync_types.count(sigbit))
- sync_types[sigbit] = type;
- else if (sync_types[sigbit] != type)
- sync_types[sigbit] = RTLIL::STe;
- sync_wires.insert(signal.as_wire());
+ if (!edge_types.count(sigbit))
+ edge_types[sigbit] = type;
+ else if (edge_types[sigbit] != type)
+ edge_types[sigbit] = RTLIL::STe;
+ edge_wires.insert(signal.as_wire());
}
void analyze_design(RTLIL::Design *design)
{
bool has_feedback_arcs = false;
+ bool has_buffered_wires = false;
+
for (auto module : design->modules()) {
if (!design->selected_module(module))
continue;
@@ -1680,6 +1846,10 @@ struct CxxrtlWorker {
}
}
}
+
+ // Black boxes converge by default, since their implementations are quite unlikely to require
+ // internal propagation of comb signals.
+ eval_converges[module] = true;
continue;
}
@@ -1788,23 +1958,15 @@ struct CxxrtlWorker {
if (wire->get_bool_attribute(ID::keep)) continue;
if (wire->name.begins_with("$") && !elide_internal) continue;
if (wire->name.begins_with("\\") && !elide_public) continue;
- if (sync_wires[wire]) continue;
- log_assert(flow.wire_defs[wire].size() == 1);
- elided_wires[wire] = **flow.wire_defs[wire].begin();
+ if (edge_wires[wire]) continue;
+ log_assert(flow.wire_comb_defs[wire].size() == 1);
+ elided_wires[wire] = **flow.wire_comb_defs[wire].begin();
}
- // Elided wires that are outputs of internal cells are always connected to a well known port (Y).
- // For user cells, there could be multiple of them, and we need a way to look up the port name
- // knowing only the wire.
- for (auto cell : module->cells())
- for (auto conn : cell->connections())
- if (conn.second.is_wire() && elided_wires.count(conn.second.as_wire()))
- cell_wire_defs[cell][conn.second.as_wire()] = conn.first;
-
dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_defs;
- for (auto wire_def : flow.wire_defs)
- for (auto node : wire_def.second)
- node_defs[node].insert(wire_def.first);
+ for (auto wire_comb_def : flow.wire_comb_defs)
+ for (auto node : wire_comb_def.second)
+ node_defs[node].insert(wire_comb_def.first);
Scheduler<FlowGraph::Node> scheduler;
dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_map;
@@ -1843,10 +2005,9 @@ struct CxxrtlWorker {
if (!feedback_wires.empty()) {
has_feedback_arcs = true;
- log("Module `%s' contains feedback arcs through wires:\n", module->name.c_str());
- for (auto wire : feedback_wires) {
- log(" %s\n", wire->name.c_str());
- }
+ log("Module `%s' contains feedback arcs through wires:\n", log_id(module));
+ for (auto wire : feedback_wires)
+ log(" %s\n", log_id(wire));
}
for (auto wire : module->wires()) {
@@ -1855,14 +2016,46 @@ struct CxxrtlWorker {
if (wire->get_bool_attribute(ID::keep)) continue;
if (wire->name.begins_with("$") && !localize_internal) continue;
if (wire->name.begins_with("\\") && !localize_public) continue;
- if (sync_wires[wire]) continue;
- // Outputs of FF/$memrd cells and LHS of sync actions do not end up in defs.
- if (flow.wire_defs[wire].size() != 1) continue;
+ if (edge_wires[wire]) continue;
+ if (flow.wire_sync_defs.count(wire) > 0) continue;
localized_wires.insert(wire);
}
+
+ // For maximum performance, the state of the simulation (which is the same as the set of its double buffered
+ // wires, since using a singly buffered wire for any kind of state introduces a race condition) should contain
+ // no wires attached to combinatorial outputs. Feedback wires, by definition, make that impossible. However,
+ // it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases
+ // as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs
+ // also require more than one delta cycle to converge.
+ pool<const RTLIL::Wire*> buffered_wires;
+ for (auto wire : module->wires()) {
+ if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) {
+ if (!feedback_wires[wire])
+ buffered_wires.insert(wire);
+ }
+ }
+ if (!buffered_wires.empty()) {
+ has_buffered_wires = true;
+ log("Module `%s' contains buffered combinatorial wires:\n", log_id(module));
+ for (auto wire : buffered_wires)
+ log(" %s\n", log_id(wire));
+ }
+
+ eval_converges[module] = feedback_wires.empty() && buffered_wires.empty();
}
- if (has_feedback_arcs) {
- log("Feedback arcs require delta cycles during evaluation.\n");
+ if (has_feedback_arcs || has_buffered_wires) {
+ // Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated
+ // by optimizing the design, if after `opt_clean -purge` there are any feedback wires remaining, it is very
+ // likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message.
+ const char *why_pessimistic = nullptr;
+ if (has_feedback_arcs)
+ why_pessimistic = "feedback wires";
+ else if (has_buffered_wires)
+ why_pessimistic = "buffered combinatorial wires";
+ log("\n");
+ log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic);
+ if (!max_opt_level)
+ log("Increasing the optimization level may eliminate %s from the design.\n", why_pessimistic);
}
}
@@ -1894,8 +2087,12 @@ struct CxxrtlWorker {
void prepare_design(RTLIL::Design *design)
{
bool has_sync_init, has_packed_mem;
+ log_push();
check_design(design, has_sync_init, has_packed_mem);
- if (has_sync_init) {
+ if (run_proc_flatten) {
+ Pass::call(design, "proc");
+ Pass::call(design, "flatten");
+ } else if (has_sync_init) {
// We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those
// in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.)
Pass::call(design, "proc_prune");
@@ -1908,18 +2105,15 @@ struct CxxrtlWorker {
if (has_sync_init || has_packed_mem)
check_design(design, has_sync_init, has_packed_mem);
log_assert(!(has_sync_init || has_packed_mem));
-
- if (run_splitnets) {
- Pass::call(design, "splitnets -driver");
+ if (run_opt_clean_purge)
Pass::call(design, "opt_clean -purge");
- }
- log("\n");
+ log_pop();
analyze_design(design);
}
};
struct CxxrtlBackend : public Backend {
- static const int DEFAULT_OPT_LEVEL = 5;
+ static const int DEFAULT_OPT_LEVEL = 6;
CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { }
void help() YS_OVERRIDE
@@ -1941,9 +2135,9 @@ struct CxxrtlBackend : public Backend {
log(" top.step();\n");
log(" while (1) {\n");
log(" /* user logic */\n");
- log(" top.p_clk.next = value<1> {0u};\n");
+ log(" top.p_clk = value<1> {0u};\n");
log(" top.step();\n");
- log(" top.p_clk.next = value<1> {1u};\n");
+ log(" top.p_clk = value<1> {1u};\n");
log(" top.step();\n");
log(" }\n");
log(" }\n");
@@ -1965,18 +2159,20 @@ struct CxxrtlBackend : public Backend {
log(" module debug(...);\n");
log(" (* cxxrtl.edge = \"p\" *) input clk;\n");
log(" input en;\n");
- log(" input [7:0] data;\n");
+ log(" input [7:0] i_data;\n");
+ log(" (* cxxrtl.sync *) output [7:0] o_data;\n");
log(" endmodule\n");
log("\n");
log("For this HDL interface, this backend will generate the following C++ interface:\n");
log("\n");
log(" struct bb_p_debug : public module {\n");
- log(" wire<1> p_clk;\n");
- log(" bool posedge_p_clk = false;\n");
- log(" wire<1> p_en;\n");
- log(" wire<8> p_data;\n");
+ log(" value<1> p_clk;\n");
+ log(" bool posedge_p_clk() const { /* ... */ }\n");
+ log(" value<1> p_en;\n");
+ log(" value<8> p_i_data;\n");
+ log(" wire<8> p_o_data;\n");
log("\n");
- log(" void eval() override;\n");
+ log(" bool eval() override;\n");
log(" bool commit() override;\n");
log("\n");
log(" static std::unique_ptr<bb_p_debug>\n");
@@ -1989,10 +2185,11 @@ struct CxxrtlBackend : public Backend {
log(" namespace cxxrtl_design {\n");
log("\n");
log(" struct stderr_debug : public bb_p_debug {\n");
- log(" void eval() override {\n");
- log(" if (posedge_p_clk && p_en.curr)\n");
- log(" fprintf(stderr, \"debug: %%02x\\n\", p_data.curr.data[0]);\n");
- log(" bb_p_debug::eval();\n");
+ log(" bool eval() override {\n");
+ log(" if (posedge_p_clk() && p_en)\n");
+ log(" fprintf(stderr, \"debug: %%02x\\n\", p_i_data.data[0]);\n");
+ log(" p_o_data.next = p_i_data;\n");
+ log(" return bb_p_debug::eval();\n");
log(" }\n");
log(" };\n");
log("\n");
@@ -2013,7 +2210,8 @@ struct CxxrtlBackend : public Backend {
log(" parameter WIDTH = 8;\n");
log(" (* cxxrtl.edge = \"p\" *) input clk;\n");
log(" input en;\n");
- log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] data;\n");
+ log(" (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] i_data;\n");
+ log(" (* cxxrtl.width = \"WIDTH\" *) output [WIDTH - 1:0] o_data;\n");
log(" endmodule\n");
log("\n");
log("For this parametric HDL interface, this backend will generate the following C++\n");
@@ -2022,7 +2220,8 @@ struct CxxrtlBackend : public Backend {
log(" template<size_t WIDTH>\n");
log(" struct bb_p_debug : public module {\n");
log(" // ...\n");
- log(" wire<WIDTH> p_data;\n");
+ log(" value<WIDTH> p_i_data;\n");
+ log(" wire<WIDTH> p_o_data;\n");
log(" // ...\n");
log(" static std::unique_ptr<bb_p_debug<WIDTH>>\n");
log(" create(std::string name, metadata_map parameters, metadata_map attributes);\n");
@@ -2053,10 +2252,9 @@ struct CxxrtlBackend : public Backend {
log("\n");
log(" cxxrtl.edge\n");
log(" only valid on inputs of black boxes. must be one of \"p\", \"n\", \"a\".\n");
- log(" if specified on signal `clk`, the generated code includes boolean fields\n");
- log(" `posedge_p_clk` (if \"p\"), `negedge_p_clk` (if \"n\"), or both (if \"a\"),\n");
- log(" as well as edge detection logic, simplifying implementation of clocked\n");
- log(" black boxes.\n");
+ log(" if specified on signal `clk`, the generated code includes edge detectors\n");
+ log(" `posedge_p_clk()` (if \"p\"), `negedge_p_clk()` (if \"n\"), or both (if\n");
+ log(" \"a\"), simplifying implementation of clocked black boxes.\n");
log("\n");
log(" cxxrtl.template\n");
log(" only valid on black boxes. must contain a space separated sequence of\n");
@@ -2067,6 +2265,13 @@ struct CxxrtlBackend : public Backend {
log(" only valid on ports of black boxes. must be a constant expression, which\n");
log(" is directly inserted into generated code.\n");
log("\n");
+ log(" cxxrtl.comb, cxxrtl.sync\n");
+ log(" only valid on outputs of black boxes. if specified, indicates that every\n");
+ log(" bit of the output port is driven, correspondingly, by combinatorial or\n");
+ log(" synchronous logic. this knowledge is used for scheduling optimizations.\n");
+ log(" if neither is specified, the output will be pessimistically treated as\n");
+ log(" driven by both combinatorial and synchronous logic.\n");
+ log("\n");
log("The following options are supported by this backend:\n");
log("\n");
log(" -header\n");
@@ -2100,7 +2305,10 @@ struct CxxrtlBackend : public Backend {
log(" like -O3, and localize public wires not marked (*keep*) if possible.\n");
log("\n");
log(" -O5\n");
- log(" like -O4, and run `splitnets -driver; opt_clean -purge` first.\n");
+ log(" like -O4, and run `opt_clean -purge` first.\n");
+ log("\n");
+ log(" -O6\n");
+ log(" like -O5, and run `proc; flatten` first.\n");
log("\n");
}
void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -2134,8 +2342,11 @@ struct CxxrtlBackend : public Backend {
extra_args(f, filename, args, argidx);
switch (opt_level) {
+ case 6:
+ worker.max_opt_level = true;
+ worker.run_proc_flatten = true;
case 5:
- worker.run_splitnets = true;
+ worker.run_opt_clean_purge = true;
case 4:
worker.localize_public = true;
case 3:
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h
index 41e6290d1..b79bbbc72 100644
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@@ -717,15 +717,16 @@ struct module {
module(const module &) = delete;
module &operator=(const module &) = delete;
- virtual void eval() = 0;
+ virtual bool eval() = 0;
virtual bool commit() = 0;
size_t step() {
size_t deltas = 0;
+ bool converged = false;
do {
- eval();
+ converged = eval();
deltas++;
- } while (commit());
+ } while (commit() && !converged);
return deltas;
}
};