diff options
Diffstat (limited to 'passes')
33 files changed, 3663 insertions, 540 deletions
diff --git a/passes/cmds/Makefile.inc b/passes/cmds/Makefile.inc index cf9663d1d..c7edc30fb 100644 --- a/passes/cmds/Makefile.inc +++ b/passes/cmds/Makefile.inc @@ -5,6 +5,7 @@ OBJS += passes/cmds/design.o OBJS += passes/cmds/select.o OBJS += passes/cmds/show.o OBJS += passes/cmds/rename.o +OBJS += passes/cmds/autoname.o OBJS += passes/cmds/connect.o OBJS += passes/cmds/scatter.o OBJS += passes/cmds/setundef.o diff --git a/passes/cmds/autoname.cc b/passes/cmds/autoname.cc new file mode 100644 index 000000000..4614a8153 --- /dev/null +++ b/passes/cmds/autoname.cc @@ -0,0 +1,134 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +int autoname_worker(Module *module) +{ + dict<Cell*, pair<int, IdString>> proposed_cell_names; + dict<Wire*, pair<int, IdString>> proposed_wire_names; + dict<Wire*, int> wire_score; + int best_score = -1; + + for (auto cell : module->selected_cells()) + for (auto &conn : cell->connections()) + for (auto bit : conn.second) + if (bit.wire != nullptr) + wire_score[bit.wire]++; + + for (auto cell : module->selected_cells()) { + if (cell->name[0] == '$') { + for (auto &conn : cell->connections()) { + string suffix = stringf("_%s_%s", log_id(cell->type), log_id(conn.first)); + for (auto bit : conn.second) + if (bit.wire != nullptr && bit.wire->name[0] != '$') { + IdString new_name(bit.wire->name.str() + suffix); + int score = wire_score.at(bit.wire); + if (cell->output(conn.first)) score = 0; + score = 10000*score + new_name.size(); + if (!proposed_cell_names.count(cell) || score < proposed_cell_names.at(cell).first) { + if (best_score < 0 || score < best_score) + best_score = score; + proposed_cell_names[cell] = make_pair(score, new_name); + } + } + } + } else { + for (auto &conn : cell->connections()) { + string suffix = stringf("_%s", log_id(conn.first)); + for (auto bit : conn.second) + if (bit.wire != nullptr && bit.wire->name[0] == '$') { + IdString new_name(cell->name.str() + suffix); + int score = wire_score.at(bit.wire); + if (cell->output(conn.first)) score = 0; + score = 10000*score + new_name.size(); + if (!proposed_wire_names.count(bit.wire) || score < proposed_wire_names.at(bit.wire).first) { + if (best_score < 0 || score < best_score) + best_score = score; + proposed_wire_names[bit.wire] = make_pair(score, new_name); + } + } + } + } + } + + for (auto &it : proposed_cell_names) { + if (best_score*2 < it.second.first) + continue; + IdString n = module->uniquify(it.second.second); + log_debug("Rename cell %s in %s to %s.\n", log_id(it.first), log_id(module), log_id(n)); + module->rename(it.first, n); + } + + for (auto &it : proposed_wire_names) { + if (best_score*2 < it.second.first) + continue; + IdString n = module->uniquify(it.second.second); + log_debug("Rename wire %s in %s to %s.\n", log_id(it.first), log_id(module), log_id(n)); + module->rename(it.first, n); + } + + return proposed_cell_names.size() + proposed_wire_names.size(); +} + +struct AutonamePass : public Pass { + AutonamePass() : Pass("autoname", "automatically assign names to objects") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" autoname [selection]\n"); + log("\n"); + log("Assign auto-generated public names to objects with private names (the ones\n"); + log("with $-prefix).\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + // if (args[argidx] == "-foo") { + // foo = true; + // continue; + // } + break; + } + + log_header(design, "Executing AUTONAME pass.\n"); + + for (auto module : design->selected_modules()) + { + int count = 0, iter = 0; + while (1) { + iter++; + int n = autoname_worker(module); + if (!n) break; + count += n; + } + if (count > 0) + log("Renamed %d objects in module %s (%d iterations).\n", count, log_id(module), iter); + } + } +} AutonamePass; + +PRIVATE_NAMESPACE_END diff --git a/passes/cmds/check.cc b/passes/cmds/check.cc index 64697c134..820ecac7b 100644 --- a/passes/cmds/check.cc +++ b/passes/cmds/check.cc @@ -41,14 +41,24 @@ struct CheckPass : public Pass { log("\n"); log(" - used wires that do not have a driver\n"); log("\n"); - log("When called with -noinit then this command also checks for wires which have\n"); - log("the 'init' attribute set.\n"); + log("Options:\n"); log("\n"); - log("When called with -initdrv then this command also checks for wires which have\n"); - log("the 'init' attribute set and aren't driven by a FF cell type.\n"); + log(" -noinit\n"); + log(" Also check for wires which have the 'init' attribute set.\n"); log("\n"); - log("When called with -assert then the command will produce an error if any\n"); - log("problems are found in the current design.\n"); + log(" -initdrv\n"); + log(" Also check for wires that have the 'init' attribute set and are not\n"); + log(" driven by an FF cell type.\n"); + log("\n"); + log(" -mapped\n"); + log(" Also check for internal cells that have not been mapped to cells of the\n"); + log(" target architecture.\n"); + log("\n"); + log(" -allow-tbuf\n"); + log(" Modify the -mapped behavior to still allow $_TBUF_ cells.\n"); + log("\n"); + log(" -assert\n"); + log(" Produce a runtime error if any problems are found in the current design.\n"); log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE @@ -56,6 +66,8 @@ struct CheckPass : public Pass { int counter = 0; bool noinit = false; bool initdrv = false; + bool mapped = false; + bool allow_tbuf = false; bool assert_mode = false; size_t argidx; @@ -68,6 +80,14 @@ struct CheckPass : public Pass { initdrv = true; continue; } + if (args[argidx] == "-mapped") { + mapped = true; + continue; + } + if (args[argidx] == "-allow-tbuf") { + allow_tbuf = true; + continue; + } if (args[argidx] == "-assert") { assert_mode = true; continue; @@ -135,29 +155,37 @@ struct CheckPass : public Pass { TopoSort<string> topo; for (auto cell : module->cells()) - for (auto &conn : cell->connections()) { - SigSpec sig = sigmap(conn.second); - bool logic_cell = yosys_celltypes.cell_evaluable(cell->type); - if (cell->input(conn.first)) - for (auto bit : sig) - if (bit.wire) { + { + if (mapped && cell->type.begins_with("$") && design->module(cell->type) == nullptr) { + if (allow_tbuf && cell->type == ID($_TBUF_)) goto cell_allowed; + log_warning("Cell %s.%s is an unmapped internal cell of type %s.\n", log_id(module), log_id(cell), log_id(cell->type)); + counter++; + cell_allowed:; + } + for (auto &conn : cell->connections()) { + SigSpec sig = sigmap(conn.second); + bool logic_cell = yosys_celltypes.cell_evaluable(cell->type); + if (cell->input(conn.first)) + for (auto bit : sig) + if (bit.wire) { + if (logic_cell) + topo.edge(stringf("wire %s", log_signal(bit)), + stringf("cell %s (%s)", log_id(cell), log_id(cell->type))); + used_wires.insert(bit); + } + if (cell->output(conn.first)) + for (int i = 0; i < GetSize(sig); i++) { if (logic_cell) - topo.edge(stringf("wire %s", log_signal(bit)), - stringf("cell %s (%s)", log_id(cell), log_id(cell->type))); - used_wires.insert(bit); + topo.edge(stringf("cell %s (%s)", log_id(cell), log_id(cell->type)), + stringf("wire %s", log_signal(sig[i]))); + if (sig[i].wire) + wire_drivers[sig[i]].push_back(stringf("port %s[%d] of cell %s (%s)", + log_id(conn.first), i, log_id(cell), log_id(cell->type))); } - if (cell->output(conn.first)) - for (int i = 0; i < GetSize(sig); i++) { - if (logic_cell) - topo.edge(stringf("cell %s (%s)", log_id(cell), log_id(cell->type)), - stringf("wire %s", log_signal(sig[i]))); - if (sig[i].wire) - wire_drivers[sig[i]].push_back(stringf("port %s[%d] of cell %s (%s)", - log_id(conn.first), i, log_id(cell), log_id(cell->type))); - } - if (!cell->input(conn.first) && cell->output(conn.first)) - for (auto bit : sig) - if (bit.wire) wire_drivers_count[bit]++; + if (!cell->input(conn.first) && cell->output(conn.first)) + for (auto bit : sig) + if (bit.wire) wire_drivers_count[bit]++; + } } pool<SigBit> init_bits; diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index d4c7f7953..c7e6d71a6 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -32,7 +32,8 @@ struct EquivOptPass:public ScriptPass log("\n"); log(" equiv_opt [options] [command]\n"); log("\n"); - log("This command checks circuit equivalence before and after an optimization pass.\n"); + log("This command uses temporal induction to check circuit equivalence before and\n"); + log("after an optimization pass.\n"); log("\n"); log(" -run <from_label>:<to_label>\n"); log(" only run the commands between the labels (see below). an empty\n"); @@ -49,6 +50,9 @@ struct EquivOptPass:public ScriptPass log(" -multiclock\n"); log(" run clk2fflogic before equivalence checking.\n"); log("\n"); + log(" -async2sync\n"); + log(" run async2sync before equivalence checking.\n"); + log("\n"); log(" -undef\n"); log(" enable modelling of undef states during equiv_induct.\n"); log("\n"); @@ -58,7 +62,7 @@ struct EquivOptPass:public ScriptPass } std::string command, techmap_opts; - bool assert, undef, multiclock; + bool assert, undef, multiclock, async2sync; void clear_flags() YS_OVERRIDE { @@ -67,6 +71,7 @@ struct EquivOptPass:public ScriptPass assert = false; undef = false; multiclock = false; + async2sync = false; } void execute(std::vector < std::string > args, RTLIL::Design * design) YS_OVERRIDE @@ -100,6 +105,10 @@ struct EquivOptPass:public ScriptPass multiclock = true; continue; } + if (args[argidx] == "-async2sync") { + async2sync = true; + continue; + } break; } @@ -119,6 +128,9 @@ struct EquivOptPass:public ScriptPass if (!design->full_selection()) log_cmd_error("This command only operates on fully selected designs!\n"); + if (async2sync && multiclock) + log_cmd_error("The '-async2sync' and '-multiclock' options are mutually exclusive!\n"); + log_header(design, "Executing EQUIV_OPT pass.\n"); log_push(); @@ -156,6 +168,8 @@ struct EquivOptPass:public ScriptPass if (check_label("prove")) { if (multiclock || help_mode) run("clk2fflogic", "(only with -multiclock)"); + if (async2sync || help_mode) + run("async2sync", " (only with -async2sync)"); run("equiv_make gold gate equiv"); if (help_mode) run("equiv_induct [-undef] equiv"); diff --git a/passes/fsm/fsm_detect.cc b/passes/fsm/fsm_detect.cc index 5ae991b28..fb3896669 100644 --- a/passes/fsm/fsm_detect.cc +++ b/passes/fsm/fsm_detect.cc @@ -158,22 +158,25 @@ static void detect_fsm(RTLIL::Wire *wire) std::set<sig2driver_entry_t> cellport_list; sig2user.find(sig_q, cellport_list); + auto sig_q_bits = sig_q.to_sigbit_pool(); + for (auto &cellport : cellport_list) { RTLIL::Cell *cell = cellport.first; bool set_output = false, clr_output = false; - if (cell->type == "$ne") + if (cell->type.in("$ne", "$reduce_or", "$reduce_bool")) set_output = true; - if (cell->type == "$eq") + if (cell->type.in("$eq", "$logic_not", "$reduce_and")) clr_output = true; - if (!set_output && !clr_output) { - clr_output = true; + if (set_output || clr_output) { for (auto &port_it : cell->connections()) - if (port_it.first != "\\A" || port_it.first != "\\Y") - clr_output = false; + if (cell->input(port_it.first)) + for (auto bit : assign_map(port_it.second)) + if (bit.wire != nullptr && !sig_q_bits.count(bit)) + goto next_cellport; } if (set_output || clr_output) { @@ -184,6 +187,7 @@ static void detect_fsm(RTLIL::Wire *wire) ce.set(sig, val); } } + next_cellport:; } SigSpec sig_y = sig_d, sig_undef; diff --git a/passes/memory/memory.cc b/passes/memory/memory.cc index 712bc2537..cee63bdd8 100644 --- a/passes/memory/memory.cc +++ b/passes/memory/memory.cc @@ -35,6 +35,7 @@ struct MemoryPass : public Pass { log("\n"); log("This pass calls all the other memory_* passes in a useful order:\n"); log("\n"); + log(" opt_mem\n"); log(" memory_dff [-nordff] (-memx implies -nordff)\n"); log(" opt_clean\n"); log(" memory_share\n"); @@ -81,6 +82,7 @@ struct MemoryPass : public Pass { } extra_args(args, argidx, design); + Pass::call(design, "opt_mem"); Pass::call(design, flag_nordff ? "memory_dff -nordff" : "memory_dff"); Pass::call(design, "opt_clean"); Pass::call(design, "memory_share"); diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc index eb07e9452..002c1a6a1 100644 --- a/passes/opt/Makefile.inc +++ b/passes/opt/Makefile.inc @@ -1,6 +1,7 @@ OBJS += passes/opt/opt.o OBJS += passes/opt/opt_merge.o +OBJS += passes/opt/opt_mem.o OBJS += passes/opt/opt_muxtree.o OBJS += passes/opt/opt_reduce.o OBJS += passes/opt/opt_rmdff.o diff --git a/passes/opt/opt_mem.cc b/passes/opt/opt_mem.cc new file mode 100644 index 000000000..98d3551eb --- /dev/null +++ b/passes/opt/opt_mem.cc @@ -0,0 +1,143 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct OptMemWorker +{ + RTLIL::Design *design; + RTLIL::Module *module; + SigMap sigmap; + bool restart; + + dict<IdString, vector<IdString>> memrd, memwr, meminit; + pool<IdString> remove_mem, remove_cells; + + OptMemWorker(RTLIL::Module *module) : design(module->design), module(module), sigmap(module), restart(false) + { + for (auto &it : module->memories) + { + memrd[it.first]; + memwr[it.first]; + meminit[it.first]; + } + + for (auto cell : module->cells()) + { + if (cell->type == ID($memrd)) { + IdString id = cell->getParam(ID(MEMID)).decode_string(); + memrd.at(id).push_back(cell->name); + } + + if (cell->type == ID($memwr)) { + IdString id = cell->getParam(ID(MEMID)).decode_string(); + memwr.at(id).push_back(cell->name); + } + + if (cell->type == ID($meminit)) { + IdString id = cell->getParam(ID(MEMID)).decode_string(); + meminit.at(id).push_back(cell->name); + } + } + } + + ~OptMemWorker() + { + for (auto it : remove_mem) + { + for (auto cell_name : memrd[it]) + module->remove(module->cell(cell_name)); + for (auto cell_name : memwr[it]) + module->remove(module->cell(cell_name)); + for (auto cell_name : meminit[it]) + module->remove(module->cell(cell_name)); + + delete module->memories.at(it); + module->memories.erase(it); + } + + for (auto cell_name : remove_cells) + module->remove(module->cell(cell_name)); + } + + int run(RTLIL::Memory *mem) + { + if (restart || remove_mem.count(mem->name)) + return 0; + + if (memwr.at(mem->name).empty() && meminit.at(mem->name).empty()) { + log("Removing memory %s.%s with no write ports or init data.\n", log_id(module), log_id(mem)); + remove_mem.insert(mem->name); + return 1; + } + + return 0; + } +}; + +struct OptMemPass : public Pass { + OptMemPass() : Pass("opt_mem", "optimize memories") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" opt_mem [options] [selection]\n"); + log("\n"); + log("This pass performs various optimizations on memories in the design.\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + log_header(design, "Executing OPT_MEM pass (optimize memories).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + // if (args[argidx] == "-nomux") { + // mode_nomux = true; + // continue; + // } + break; + } + extra_args(args, argidx, design); + + int total_count = 0; + for (auto module : design->selected_modules()) { + while (1) { + int cnt = 0; + OptMemWorker worker(module); + for (auto &it : module->memories) + if (module->selected(it.second)) + cnt += worker.run(it.second); + if (!cnt && !worker.restart) + break; + total_count += cnt; + } + } + + if (total_count) + design->scratchpad_set_bool("opt.did_something", true); + log("Performed a total of %d transformations.\n", total_count); + } +} OptMemPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/opt/wreduce.cc b/passes/opt/wreduce.cc index c02c355cb..04b882db9 100644 --- a/passes/opt/wreduce.cc +++ b/passes/opt/wreduce.cc @@ -143,13 +143,18 @@ struct WreduceWorker SigSpec sig_d = mi.sigmap(cell->getPort(ID(D))); SigSpec sig_q = mi.sigmap(cell->getPort(ID(Q))); - Const initval; + bool is_adff = (cell->type == ID($adff)); + Const initval, arst_value; int width_before = GetSize(sig_q); if (width_before == 0) return; + if (cell->parameters.count(ID(ARST_VALUE))) { + arst_value = cell->parameters[ID(ARST_VALUE)]; + } + bool zero_ext = sig_d[GetSize(sig_d)-1] == State::S0; bool sign_ext = !zero_ext; @@ -163,7 +168,8 @@ struct WreduceWorker for (int i = GetSize(sig_q)-1; i >= 0; i--) { - if (zero_ext && sig_d[i] == State::S0 && (initval[i] == State::S0 || initval[i] == State::Sx)) { + if (zero_ext && sig_d[i] == State::S0 && (initval[i] == State::S0 || initval[i] == State::Sx) && + (!is_adff || i >= GetSize(arst_value) || arst_value[i] == State::S0 || arst_value[i] == State::Sx)) { module->connect(sig_q[i], State::S0); remove_init_bits.insert(sig_q[i]); sig_d.remove(i); @@ -171,7 +177,8 @@ struct WreduceWorker continue; } - if (sign_ext && i > 0 && sig_d[i] == sig_d[i-1] && initval[i] == initval[i-1]) { + if (sign_ext && i > 0 && sig_d[i] == sig_d[i-1] && initval[i] == initval[i-1] && + (!is_adff || i >= GetSize(arst_value) || arst_value[i] == arst_value[i-1])) { module->connect(sig_q[i], sig_q[i-1]); remove_init_bits.insert(sig_q[i]); sig_d.remove(i); @@ -214,7 +221,6 @@ struct WreduceWorker // Narrow ARST_VALUE parameter to new size. if (cell->parameters.count(ID(ARST_VALUE))) { - Const arst_value = cell->getParam(ID(ARST_VALUE)); arst_value.bits.resize(GetSize(sig_q)); cell->setParam(ID(ARST_VALUE), arst_value); } diff --git a/passes/pmgen/.gitignore b/passes/pmgen/.gitignore index 6b319b8c3..e52f3282f 100644 --- a/passes/pmgen/.gitignore +++ b/passes/pmgen/.gitignore @@ -1 +1 @@ -/*_pm.h
\ No newline at end of file +/*_pm.h diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index 98691d0fe..145d2ebf9 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -1,5 +1,5 @@ %_pm.h: passes/pmgen/pmgen.py %.pmg - $(P) mkdir -p passes/pmgen && python3 $< -o $@ -p $(subst _pm.h,,$(notdir $@)) $(filter-out $<,$^) + $(P) mkdir -p passes/pmgen && $(PYTHON_EXECUTABLE) $< -o $@ -p $(subst _pm.h,,$(notdir $@)) $(filter-out $<,$^) # -------------------------------------- @@ -21,6 +21,14 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h)) # -------------------------------------- +OBJS += passes/pmgen/xilinx_dsp.o +passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h)) +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_CREG_pm.h)) +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h)) + +# -------------------------------------- + OBJS += passes/pmgen/peepopt.o passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) @@ -30,7 +38,7 @@ PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_dffmux.pmg passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) - $(P) mkdir -p passes/pmgen && python3 $< -o $@ -p peepopt $(filter-out $<,$^) + $(P) mkdir -p passes/pmgen && $(PYTHON_EXECUTABLE) $< -o $@ -p peepopt $(filter-out $<,$^) # -------------------------------------- diff --git a/passes/pmgen/README.md b/passes/pmgen/README.md index 2f5b8d0b2..39560839f 100644 --- a/passes/pmgen/README.md +++ b/passes/pmgen/README.md @@ -190,7 +190,7 @@ create matches for different sections of a cell. For example: select pmux->type == $pmux slice idx GetSize(port(pmux, \S)) index <SigBit> port(pmux, \S)[idx] === port(eq, \Y) - set pmux_slice idx + set pmux_slice idx endmatch The first argument to `slice` is the local variable name used to identify the diff --git a/passes/pmgen/generate.h b/passes/pmgen/generate.h new file mode 100644 index 000000000..354583de5 --- /dev/null +++ b/passes/pmgen/generate.h @@ -0,0 +1,140 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PMGEN_GENERATE +#define PMGEN_GENERATE + +#define GENERATE_PATTERN(pmclass, pattern) \ + generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) + +void pmtest_addports(Module *module) +{ + pool<SigBit> driven_bits, used_bits; + SigMap sigmap(module); + int icnt = 0, ocnt = 0; + + for (auto cell : module->cells()) + for (auto conn : cell->connections()) + { + if (cell->input(conn.first)) + for (auto bit : sigmap(conn.second)) + used_bits.insert(bit); + if (cell->output(conn.first)) + for (auto bit : sigmap(conn.second)) + driven_bits.insert(bit); + } + + for (auto wire : vector<Wire*>(module->wires())) + { + SigSpec ibits, obits; + for (auto bit : sigmap(wire)) { + if (!used_bits.count(bit)) + obits.append(bit); + if (!driven_bits.count(bit)) + ibits.append(bit); + } + if (!ibits.empty()) { + Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); + w->port_input = true; + module->connect(ibits, w); + } + if (!obits.empty()) { + Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); + w->port_output = true; + module->connect(w, obits); + } + } + + module->fixup_ports(); +} + +template <class pm> +void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design) +{ + log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); + + int modcnt = 0; + int maxmodcnt = 100; + int maxsubcnt = 4; + int timeout = 0; + vector<Module*> mods; + + while (modcnt < maxmodcnt) + { + int submodcnt = 0, itercnt = 0, cellcnt = 0; + Module *mod = design->addModule(NEW_ID); + + while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) + { + if (timeout++ > 10000) + log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); + + pm matcher(mod, mod->cells()); + + matcher.rng(1); + matcher.rngseed += modcnt; + matcher.rng(1); + matcher.rngseed += submodcnt; + matcher.rng(1); + matcher.rngseed += itercnt; + matcher.rng(1); + matcher.rngseed += cellcnt; + matcher.rng(1); + + if (GetSize(mod->cells()) != cellcnt) + { + bool found_match = false; + run(matcher, [&](){ found_match = true; }); + cellcnt = GetSize(mod->cells()); + + if (found_match) { + Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", + pmclass, pattern, modcnt++)); + log("Creating module %s with %d cells.\n", log_id(m), cellcnt); + mod->cloneInto(m); + pmtest_addports(m); + mods.push_back(m); + submodcnt++; + timeout = 0; + } + } + + matcher.generate_mode = true; + run(matcher, [](){}); + } + + if (submodcnt && maxsubcnt < (1 << 16)) + maxsubcnt *= 2; + + design->remove(mod); + } + + Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); + log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); + for (auto mod : mods) { + Cell *c = m->addCell(mod->name, mod->name); + for (auto port : mod->ports) { + Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); + c->setPort(port, w); + } + } + pmtest_addports(m); +} + +#endif diff --git a/passes/pmgen/ice40_dsp.cc b/passes/pmgen/ice40_dsp.cc index 16bfe537f..f60e67158 100644 --- a/passes/pmgen/ice40_dsp.cc +++ b/passes/pmgen/ice40_dsp.cc @@ -29,19 +29,19 @@ void create_ice40_dsp(ice40_dsp_pm &pm) { auto &st = pm.st_ice40_dsp; -#if 0 - log("\n"); - log("ffA: %s\n", log_id(st.ffA, "--")); - log("ffB: %s\n", log_id(st.ffB, "--")); - log("mul: %s\n", log_id(st.mul, "--")); - log("ffY: %s\n", log_id(st.ffY, "--")); - log("addAB: %s\n", log_id(st.addAB, "--")); - log("muxAB: %s\n", log_id(st.muxAB, "--")); - log("ffS: %s\n", log_id(st.ffS, "--")); -#endif - log("Checking %s.%s for iCE40 DSP inference.\n", log_id(pm.module), log_id(st.mul)); + log_debug("ffA: %s %s %s\n", log_id(st.ffA, "--"), log_id(st.ffAholdmux, "--"), log_id(st.ffArstmux, "--")); + log_debug("ffB: %s %s %s\n", log_id(st.ffB, "--"), log_id(st.ffBholdmux, "--"), log_id(st.ffBrstmux, "--")); + log_debug("ffCD: %s %s\n", log_id(st.ffCD, "--"), log_id(st.ffCDholdmux, "--")); + log_debug("mul: %s\n", log_id(st.mul, "--")); + log_debug("ffFJKG: %s\n", log_id(st.ffFJKG, "--")); + log_debug("ffH: %s\n", log_id(st.ffH, "--")); + log_debug("add: %s\n", log_id(st.add, "--")); + log_debug("mux: %s\n", log_id(st.mux, "--")); + log_debug("ffO: %s %s %s\n", log_id(st.ffO, "--"), log_id(st.ffOholdmux, "--"), log_id(st.ffOrstmux, "--")); + log_debug("\n"); + if (GetSize(st.sigA) > 16) { log(" input A (%s) is too large (%d > 16).\n", log_signal(st.sigA), GetSize(st.sigA)); return; @@ -52,59 +52,85 @@ void create_ice40_dsp(ice40_dsp_pm &pm) return; } - if (GetSize(st.sigS) > 32) { - log(" accumulator (%s) is too large (%d > 32).\n", log_signal(st.sigS), GetSize(st.sigS)); + if (GetSize(st.sigO) > 33) { + log(" adder/accumulator (%s) is too large (%d > 33).\n", log_signal(st.sigO), GetSize(st.sigO)); return; } - if (GetSize(st.sigY) > 32) { - log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigY), GetSize(st.sigY)); + if (GetSize(st.sigH) > 32) { + log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigH), GetSize(st.sigH)); return; } - bool mul_signed = st.mul->getParam("\\A_SIGNED").as_bool(); + Cell *cell = st.mul; + if (cell->type == ID($mul)) { + log(" replacing %s with SB_MAC16 cell.\n", log_id(st.mul->type)); - log(" replacing $mul with SB_MAC16 cell.\n"); - - Cell *cell = pm.module->addCell(NEW_ID, "\\SB_MAC16"); - pm.module->swap_names(cell, st.mul); + cell = pm.module->addCell(NEW_ID, ID(SB_MAC16)); + pm.module->swap_names(cell, st.mul); + } + else log_assert(cell->type == ID(SB_MAC16)); // SB_MAC16 Input Interface - SigSpec A = st.sigA; - A.extend_u0(16, mul_signed); + A.extend_u0(16, st.mul->getParam(ID(A_SIGNED)).as_bool()); + log_assert(GetSize(A) == 16); SigSpec B = st.sigB; - B.extend_u0(16, mul_signed); - - SigSpec CD; - if (st.muxA) - CD = st.muxA->getPort("\\B"); - if (st.muxB) - CD = st.muxB->getPort("\\A"); - CD.extend_u0(32, mul_signed); + B.extend_u0(16, st.mul->getParam(ID(B_SIGNED)).as_bool()); + log_assert(GetSize(B) == 16); - cell->setPort("\\A", A); - cell->setPort("\\B", B); - cell->setPort("\\C", CD.extract(0, 16)); - cell->setPort("\\D", CD.extract(16, 16)); + SigSpec CD = st.sigCD; + if (CD.empty()) + CD = RTLIL::Const(0, 32); + else + log_assert(GetSize(CD) == 32); - cell->setParam("\\A_REG", st.ffA ? State::S1 : State::S0); - cell->setParam("\\B_REG", st.ffB ? State::S1 : State::S0); + cell->setPort(ID::A, A); + cell->setPort(ID::B, B); + cell->setPort(ID(C), CD.extract(16, 16)); + cell->setPort(ID(D), CD.extract(0, 16)); - cell->setPort("\\AHOLD", State::S0); - cell->setPort("\\BHOLD", State::S0); - cell->setPort("\\CHOLD", State::S0); - cell->setPort("\\DHOLD", State::S0); + cell->setParam(ID(A_REG), st.ffA ? State::S1 : State::S0); + cell->setParam(ID(B_REG), st.ffB ? State::S1 : State::S0); + cell->setParam(ID(C_REG), st.ffCD ? State::S1 : State::S0); + cell->setParam(ID(D_REG), st.ffCD ? State::S1 : State::S0); - cell->setPort("\\IRSTTOP", State::S0); - cell->setPort("\\IRSTBOT", State::S0); + SigSpec AHOLD, BHOLD, CDHOLD; + if (st.ffAholdmux) + AHOLD = st.ffAholdpol ? st.ffAholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffAholdmux->getPort(ID(S))); + else + AHOLD = State::S0; + if (st.ffBholdmux) + BHOLD = st.ffBholdpol ? st.ffBholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffBholdmux->getPort(ID(S))); + else + BHOLD = State::S0; + if (st.ffCDholdmux) + CDHOLD = st.ffCDholdpol ? st.ffCDholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffCDholdmux->getPort(ID(S))); + else + CDHOLD = State::S0; + cell->setPort(ID(AHOLD), AHOLD); + cell->setPort(ID(BHOLD), BHOLD); + cell->setPort(ID(CHOLD), CDHOLD); + cell->setPort(ID(DHOLD), CDHOLD); + + SigSpec IRSTTOP, IRSTBOT; + if (st.ffArstmux) + IRSTTOP = st.ffArstpol ? st.ffArstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffArstmux->getPort(ID(S))); + else + IRSTTOP = State::S0; + if (st.ffBrstmux) + IRSTBOT = st.ffBrstpol ? st.ffBrstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffBrstmux->getPort(ID(S))); + else + IRSTBOT = State::S0; + cell->setPort(ID(IRSTTOP), IRSTTOP); + cell->setPort(ID(IRSTBOT), IRSTBOT); - if (st.clock_vld) + if (st.clock != SigBit()) { - cell->setPort("\\CLK", st.clock); - cell->setPort("\\CE", State::S1); - cell->setParam("\\NEG_TRIGGER", st.clock_pol ? State::S0 : State::S1); + cell->setPort(ID(CLK), st.clock); + cell->setPort(ID(CE), State::S1); + cell->setParam(ID(NEG_TRIGGER), st.clock_pol ? State::S0 : State::S1); log(" clock: %s (%s)", log_signal(st.clock), st.clock_pol ? "posedge" : "negedge"); @@ -114,91 +140,137 @@ void create_ice40_dsp(ice40_dsp_pm &pm) if (st.ffB) log(" ffB:%s", log_id(st.ffB)); - if (st.ffY) - log(" ffY:%s", log_id(st.ffY)); + if (st.ffCD) + log(" ffCD:%s", log_id(st.ffCD)); + + if (st.ffFJKG) + log(" ffFJKG:%s", log_id(st.ffFJKG)); + + if (st.ffH) + log(" ffH:%s", log_id(st.ffH)); - if (st.ffS) - log(" ffS:%s", log_id(st.ffS)); + if (st.ffO) + log(" ffO:%s", log_id(st.ffO)); log("\n"); } else { - cell->setPort("\\CLK", State::S0); - cell->setPort("\\CE", State::S0); - cell->setParam("\\NEG_TRIGGER", State::S0); + cell->setPort(ID(CLK), State::S0); + cell->setPort(ID(CE), State::S0); + cell->setParam(ID(NEG_TRIGGER), State::S0); } // SB_MAC16 Cascade Interface - cell->setPort("\\SIGNEXTIN", State::Sx); - cell->setPort("\\SIGNEXTOUT", pm.module->addWire(NEW_ID)); + cell->setPort(ID(SIGNEXTIN), State::Sx); + cell->setPort(ID(SIGNEXTOUT), pm.module->addWire(NEW_ID)); - cell->setPort("\\CI", State::Sx); - cell->setPort("\\CO", pm.module->addWire(NEW_ID)); + cell->setPort(ID(CI), State::Sx); - cell->setPort("\\ACCUMCI", State::Sx); - cell->setPort("\\ACCUMCO", pm.module->addWire(NEW_ID)); + cell->setPort(ID(ACCUMCI), State::Sx); + cell->setPort(ID(ACCUMCO), pm.module->addWire(NEW_ID)); // SB_MAC16 Output Interface - SigSpec O = st.ffS ? st.sigS : st.sigY; + SigSpec O = st.sigO; + int O_width = GetSize(O); + if (O_width == 33) { + log_assert(st.add); + // If we have a signed multiply-add, then perform sign extension + if (st.add->getParam(ID(A_SIGNED)).as_bool() && st.add->getParam(ID(B_SIGNED)).as_bool()) + pm.module->connect(O[32], O[31]); + else + cell->setPort(ID(CO), O[32]); + O.remove(O_width-1); + } + else + cell->setPort(ID(CO), pm.module->addWire(NEW_ID)); + log_assert(GetSize(O) <= 32); if (GetSize(O) < 32) O.append(pm.module->addWire(NEW_ID, 32-GetSize(O))); - cell->setPort("\\O", O); - - if (st.addAB) { - log(" accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type)); - cell->setPort("\\ADDSUBTOP", st.addAB->type == "$add" ? State::S0 : State::S1); - cell->setPort("\\ADDSUBBOT", st.addAB->type == "$add" ? State::S0 : State::S1); + cell->setPort(ID(O), O); + + bool accum = false; + if (st.add) { + accum = (st.ffO && st.add->getPort(st.addAB == ID::A ? ID::B : ID::A) == st.sigO); + if (accum) + log(" accumulator %s (%s)\n", log_id(st.add), log_id(st.add->type)); + else + log(" adder %s (%s)\n", log_id(st.add), log_id(st.add->type)); + cell->setPort(ID(ADDSUBTOP), st.add->type == ID($add) ? State::S0 : State::S1); + cell->setPort(ID(ADDSUBBOT), st.add->type == ID($add) ? State::S0 : State::S1); } else { - cell->setPort("\\ADDSUBTOP", State::S0); - cell->setPort("\\ADDSUBBOT", State::S0); + cell->setPort(ID(ADDSUBTOP), State::S0); + cell->setPort(ID(ADDSUBBOT), State::S0); } - cell->setPort("\\ORSTTOP", State::S0); - cell->setPort("\\ORSTBOT", State::S0); + SigSpec OHOLD; + if (st.ffOholdmux) + OHOLD = st.ffOholdpol ? st.ffOholdmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffOholdmux->getPort(ID(S))); + else + OHOLD = State::S0; + cell->setPort(ID(OHOLDTOP), OHOLD); + cell->setPort(ID(OHOLDBOT), OHOLD); - cell->setPort("\\OHOLDTOP", State::S0); - cell->setPort("\\OHOLDBOT", State::S0); + SigSpec ORST; + if (st.ffOrstmux) + ORST = st.ffOrstpol ? st.ffOrstmux->getPort(ID(S)) : pm.module->Not(NEW_ID, st.ffOrstmux->getPort(ID(S))); + else + ORST = State::S0; + cell->setPort(ID(ORSTTOP), ORST); + cell->setPort(ID(ORSTBOT), ORST); SigSpec acc_reset = State::S0; - if (st.muxA) - acc_reset = st.muxA->getPort("\\S"); - if (st.muxB) - acc_reset = pm.module->Not(NEW_ID, st.muxB->getPort("\\S")); - - cell->setPort("\\OLOADTOP", acc_reset); - cell->setPort("\\OLOADBOT", acc_reset); + if (st.mux) { + if (st.muxAB == ID::A) + acc_reset = st.mux->getPort(ID(S)); + else + acc_reset = pm.module->Not(NEW_ID, st.mux->getPort(ID(S))); + } + cell->setPort(ID(OLOADTOP), acc_reset); + cell->setPort(ID(OLOADBOT), acc_reset); // SB_MAC16 Remaining Parameters - cell->setParam("\\C_REG", State::S0); - cell->setParam("\\D_REG", State::S0); + cell->setParam(ID(TOP_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0); + cell->setParam(ID(BOT_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0); + cell->setParam(ID(PIPELINE_16x16_MULT_REG1), st.ffFJKG ? State::S1 : State::S0); + cell->setParam(ID(PIPELINE_16x16_MULT_REG2), st.ffH ? State::S1 : State::S0); - cell->setParam("\\TOP_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\BOT_8x8_MULT_REG", st.ffY ? State::S1 : State::S0); - cell->setParam("\\PIPELINE_16x16_MULT_REG1", st.ffY ? State::S1 : State::S0); - cell->setParam("\\PIPELINE_16x16_MULT_REG2", State::S0); + cell->setParam(ID(TOPADDSUB_LOWERINPUT), Const(2, 2)); + cell->setParam(ID(TOPADDSUB_UPPERINPUT), accum ? State::S0 : State::S1); + cell->setParam(ID(TOPADDSUB_CARRYSELECT), Const(3, 2)); - cell->setParam("\\TOPOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); - cell->setParam("\\TOPADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\TOPADDSUB_UPPERINPUT", State::S0); - cell->setParam("\\TOPADDSUB_CARRYSELECT", Const(3, 2)); + cell->setParam(ID(BOTADDSUB_LOWERINPUT), Const(2, 2)); + cell->setParam(ID(BOTADDSUB_UPPERINPUT), accum ? State::S0 : State::S1); + cell->setParam(ID(BOTADDSUB_CARRYSELECT), Const(0, 2)); - cell->setParam("\\BOTOUTPUT_SELECT", Const(st.ffS ? 1 : 3, 2)); - cell->setParam("\\BOTADDSUB_LOWERINPUT", Const(2, 2)); - cell->setParam("\\BOTADDSUB_UPPERINPUT", State::S0); - cell->setParam("\\BOTADDSUB_CARRYSELECT", Const(0, 2)); + cell->setParam(ID(MODE_8x8), State::S0); + cell->setParam(ID(A_SIGNED), st.mul->getParam(ID(A_SIGNED)).as_bool()); + cell->setParam(ID(B_SIGNED), st.mul->getParam(ID(B_SIGNED)).as_bool()); - cell->setParam("\\MODE_8x8", State::S0); - cell->setParam("\\A_SIGNED", mul_signed ? State::S1 : State::S0); - cell->setParam("\\B_SIGNED", mul_signed ? State::S1 : State::S0); + if (st.ffO) { + if (st.o_lo) + cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2)); + else + cell->setParam(ID(TOPOUTPUT_SELECT), Const(1, 2)); - pm.autoremove(st.mul); - pm.autoremove(st.ffY); - pm.autoremove(st.ffS); + st.ffO->connections_.at(ID(Q)).replace(O, pm.module->addWire(NEW_ID, GetSize(O))); + cell->setParam(ID(BOTOUTPUT_SELECT), Const(1, 2)); + } + else { + cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2)); + cell->setParam(ID(BOTOUTPUT_SELECT), Const(st.add ? 0 : 3, 2)); + } + + if (cell != st.mul) + pm.autoremove(st.mul); + else + pm.blacklist(st.mul); + pm.autoremove(st.ffFJKG); + pm.autoremove(st.add); } struct Ice40DspPass : public Pass { @@ -209,7 +281,17 @@ struct Ice40DspPass : public Pass { log("\n"); log(" ice40_dsp [options] [selection]\n"); log("\n"); - log("Map multipliers and multiply-accumulate blocks to iCE40 DSP resources.\n"); + log("Map multipliers ($mul/SB_MAC16) and multiply-accumulate ($mul/SB_MAC16 + $add)\n"); + log("cells into iCE40 DSP resources.\n"); + log("Currently, only the 16x16 multiply mode is supported and not the 2 x 8x8 mode.\n"); + log("\n"); + log("Pack input registers (A, B, {C,D}; with optional hold), pipeline registers\n"); + log("({F,J,K,G}, H), output registers (O -- full 32-bits or lower 16-bits only; with\n"); + log("optional hold), and post-adder into into the SB_MAC16 resource.\n"); + log("\n"); + log("Multiply-accumulate operations using the post-adder with feedback on the {C,D}\n"); + log("input will be folded into the DSP. In this scenario only, resetting the\n"); + log("the accumulator to an arbitrary value can be inferred to use the {C,D} input.\n"); log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE diff --git a/passes/pmgen/ice40_dsp.pmg b/passes/pmgen/ice40_dsp.pmg index 7003092bb..6b6d2b56f 100644 --- a/passes/pmgen/ice40_dsp.pmg +++ b/passes/pmgen/ice40_dsp.pmg @@ -1,163 +1,574 @@ pattern ice40_dsp state <SigBit> clock -state <bool> clock_pol clock_vld -state <SigSpec> sigA sigB sigY sigS -state <Cell*> addAB muxAB +state <bool> clock_pol cd_signed o_lo +state <SigSpec> sigA sigB sigCD sigH sigO +state <Cell*> add mux +state <IdString> addAB muxAB + +state <bool> ffAholdpol ffBholdpol ffCDholdpol ffOholdpol +state <bool> ffArstpol ffBrstpol ffCDrstpol ffOrstpol + +state <Cell*> ffA ffAholdmux ffArstmux ffB ffBholdmux ffBrstmux ffCD ffCDholdmux +state <Cell*> ffFJKG ffH ffO ffOholdmux ffOrstmux + +// subpattern +state <SigSpec> argQ argD +state <bool> ffholdpol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffholdmux dffrstmux +udata <bool> dffholdpol dffrstpol dffclock_pol match mul - select mul->type.in($mul) + select mul->type.in($mul, \SB_MAC16) select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10 - select GetSize(mul->getPort(\Y)) > 10 endmatch -match ffA - select ffA->type.in($dff) - // select nusers(port(ffA, \Q)) == 2 - index <SigSpec> port(ffA, \Q) === port(mul, \A) - optional -endmatch +code sigA sigB sigH + auto unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigA = unextend(port(mul, \A)); + sigB = unextend(port(mul, \B)); -code sigA clock clock_pol clock_vld - sigA = port(mul, \A); + SigSpec O; + if (mul->type == $mul) + O = mul->getPort(\Y); + else if (mul->type == \SB_MAC16) + O = mul->getPort(\O); + else log_abort(); + if (GetSize(O) <= 10) + reject; - if (ffA) { - sigA = port(ffA, \D); + // Only care about those bits that are used + int i; + for (i = 0; i < GetSize(O); i++) { + if (nusers(O[i]) <= 1) + break; + sigH.append(O[i]); + } + log_assert(nusers(O.extract_end(i)) <= 1); +endcode - clock = port(ffA, \CLK).as_bit(); - clock_pol = param(ffA, \CLK_POLARITY).as_bool(); - clock_vld = true; +code argQ ffA ffAholdmux ffArstmux ffAholdpol ffArstpol sigA clock clock_pol + if (mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffA = dff; + clock = dffclock; + clock_pol = dffclock_pol; + if (dffrstmux) { + ffArstmux = dffrstmux; + ffArstpol = dffrstpol; + } + if (dffholdmux) { + ffAholdmux = dffholdmux; + ffAholdpol = dffholdpol; + } + sigA = dffD; + } } endcode -match ffB - select ffB->type.in($dff) - // select nusers(port(ffB, \Q)) == 2 - index <SigSpec> port(ffB, \Q) === port(mul, \B) - optional -endmatch +code argQ ffB ffBholdmux ffBrstmux ffBholdpol ffBrstpol sigB clock clock_pol + if (mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + ffB = dff; + clock = dffclock; + clock_pol = dffclock_pol; + if (dffrstmux) { + ffBrstmux = dffrstmux; + ffBrstpol = dffrstpol; + } + if (dffholdmux) { + ffBholdmux = dffholdmux; + ffBholdpol = dffholdpol; + } + sigB = dffD; + } + } +endcode -code sigB clock clock_pol clock_vld - sigB = port(mul, \B); +code argD ffFJKG sigH clock clock_pol + if (nusers(sigH) == 2 && + (mul->type != \SB_MAC16 || + (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool()))) { + argD = sigH; + subpattern(out_dffe); + if (dff) { + // F/J/K/G do not have a CE-like (hold) input + if (dffholdmux) + goto reject_ffFJKG; - if (ffB) { - sigB = port(ffB, \D); - SigBit c = port(ffB, \CLK).as_bit(); - bool cp = param(ffB, \CLK_POLARITY).as_bool(); + // Reset signal of F/J (IRSTTOP) and K/G (IRSTBOT) + // shared with A and B + if ((ffArstmux != NULL) != (dffrstmux != NULL)) + goto reject_ffFJKG; + if ((ffBrstmux != NULL) != (dffrstmux != NULL)) + goto reject_ffFJKG; + if (ffArstmux) { + if (port(ffArstmux, \S) != port(dffrstmux, \S)) + goto reject_ffFJKG; + if (ffArstpol != dffrstpol) + goto reject_ffFJKG; + } + if (ffBrstmux) { + if (port(ffBrstmux, \S) != port(dffrstmux, \S)) + goto reject_ffFJKG; + if (ffBrstpol != dffrstpol) + goto reject_ffFJKG; + } - if (clock_vld && (c != clock || cp != clock_pol)) - reject; + ffFJKG = dff; + clock = dffclock; + clock_pol = dffclock_pol; + sigH = dffQ; + +reject_ffFJKG: ; + } + } +endcode + +code argD ffH sigH sigO clock clock_pol + if (ffFJKG && nusers(sigH) == 2 && + (mul->type != \SB_MAC16 || !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool())) { + argD = sigH; + subpattern(out_dffe); + if (dff) { + // H does not have a CE-like (hold) input + if (dffholdmux) + goto reject_ffH; + + // Reset signal of H (IRSTBOT) shared with B + if ((ffBrstmux != NULL) != (dffrstmux != NULL)) + goto reject_ffH; + if (ffBrstmux) { + if (port(ffBrstmux, \S) != port(dffrstmux, \S)) + goto reject_ffH; + if (ffBrstpol != dffrstpol) + goto reject_ffH; + } - clock = c; - clock_pol = cp; - clock_vld = true; + ffH = dff; + clock = dffclock; + clock_pol = dffclock_pol; + sigH = dffQ; + +reject_ffH: ; + } } + + sigO = sigH; endcode -match ffY - select ffY->type.in($dff) - select nusers(port(ffY, \D)) == 2 - index <SigSpec> port(ffY, \D) === port(mul, \Y) +match add + if mul->type != \SB_MAC16 || (param(mul, \TOPOUTPUT_SELECT).as_int() == 3 && param(mul, \BOTOUTPUT_SELECT).as_int() == 3) + + select add->type.in($add) + choice <IdString> AB {\A, \B} + select nusers(port(add, AB)) == 2 + + index <SigBit> port(add, AB)[0] === sigH[0] + filter GetSize(port(add, AB)) <= GetSize(sigH) + filter port(add, AB) == sigH.extract(0, GetSize(port(add, AB))) + filter nusers(sigH.extract_end(GetSize(port(add, AB)))) <= 1 + set addAB AB optional endmatch -code sigY clock clock_pol clock_vld - sigY = port(mul, \Y); +code sigCD sigO cd_signed + if (add) { + sigCD = port(add, addAB == \A ? \B : \A); + cd_signed = param(add, addAB == \A ? \B_SIGNED : \A_SIGNED).as_bool(); - if (ffY) { - sigY = port(ffY, \Q); - SigBit c = port(ffY, \CLK).as_bit(); - bool cp = param(ffY, \CLK_POLARITY).as_bool(); + int natural_mul_width = GetSize(sigA) + GetSize(sigB); + int actual_mul_width = GetSize(sigH); + int actual_acc_width = GetSize(sigCD); - if (clock_vld && (c != clock || cp != clock_pol)) + if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) + reject; + // If accumulator, check adder width and signedness + if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(add, \A_SIGNED).as_bool())) reject; - clock = c; - clock_pol = cp; - clock_vld = true; + sigO = port(add, \Y); } endcode -match addA - select addA->type.in($add) - select nusers(port(addA, \A)) == 2 - index <SigSpec> port(addA, \A) === sigY +match mux + select mux->type == $mux + choice <IdString> AB {\A, \B} + select nusers(port(mux, AB)) == 2 + index <SigSpec> port(mux, AB) === sigO + set muxAB AB optional endmatch -match addB - if !addA - select addB->type.in($add, $sub) - select nusers(port(addB, \B)) == 2 - index <SigSpec> port(addB, \B) === sigY - optional -endmatch +code sigO + if (mux) + sigO = port(mux, \Y); +endcode + +code argD ffO ffOholdmux ffOrstmux ffOholdpol ffOrstpol sigO sigCD clock clock_pol cd_signed o_lo + if (mul->type != \SB_MAC16 || + // Ensure that register is not already used + ((param(mul, \TOPOUTPUT_SELECT, 0).as_int() != 1 && param(mul, \BOTOUTPUT_SELECT, 0).as_int() != 1) && + // Ensure that OLOADTOP/OLOADBOT is unused or zero + (port(mul, \OLOADTOP, State::S0).is_fully_zero() && port(mul, \OLOADBOT, State::S0).is_fully_zero()))) { + + dff = nullptr; + + // First try entire sigO + if (nusers(sigO) == 2) { + argD = sigO; + subpattern(out_dffe); + } + + // Otherwise try just its least significant 16 bits + if (!dff && GetSize(sigO) > 16) { + argD = sigO.extract(0, 16); + if (nusers(argD) == 2) { + subpattern(out_dffe); + o_lo = dff; + } + } + + if (dff) { + ffO = dff; + clock = dffclock; + clock_pol = dffclock_pol; + if (dffrstmux) { + ffOrstmux = dffrstmux; + ffOrstpol = dffrstpol; + } + if (dffholdmux) { + ffOholdmux = dffholdmux; + ffOholdpol = dffholdpol; + } + + sigO.replace(sigO.extract(0, GetSize(dffQ)), dffQ); + } -code addAB sigS - if (addA) { - addAB = addA; - sigS = port(addA, \B); + // Loading value into output register is not + // supported unless using accumulator + if (mux) { + if (sigCD != sigO) + reject; + sigCD = port(mux, muxAB == \B ? \A : \B); + + cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool(); + } } - if (addB) { - addAB = addB; - sigS = port(addB, \A); +endcode + +code argQ ffCD ffCDholdmux ffCDholdpol ffCDrstpol sigCD clock clock_pol + if (!sigCD.empty() && sigCD != sigO && + (mul->type != \SB_MAC16 || (!param(mul, \C_REG).as_bool() && !param(mul, \D_REG).as_bool()))) { + argQ = sigCD; + subpattern(in_dffe); + if (dff) { + if (dffholdmux) { + ffCDholdmux = dffholdmux; + ffCDholdpol = dffholdpol; + } + + // Reset signal of C (IRSTTOP) and D (IRSTBOT) + // shared with A and B + if ((ffArstmux != NULL) != (dffrstmux != NULL)) + goto reject_ffCD; + if ((ffBrstmux != NULL) != (dffrstmux != NULL)) + goto reject_ffCD; + if (ffArstmux) { + if (port(ffArstmux, \S) != port(dffrstmux, \S)) + goto reject_ffCD; + if (ffArstpol != dffrstpol) + goto reject_ffCD; + } + if (ffBrstmux) { + if (port(ffBrstmux, \S) != port(dffrstmux, \S)) + goto reject_ffCD; + if (ffBrstpol != dffrstpol) + goto reject_ffCD; + } + + ffCD = dff; + clock = dffclock; + clock_pol = dffclock_pol; + sigCD = dffD; + +reject_ffCD: ; + } } - if (addAB) { - int natural_mul_width = GetSize(sigA) + GetSize(sigB); - int actual_mul_width = GetSize(sigY); - int actual_acc_width = GetSize(sigS); +endcode - if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width)) +code sigCD + sigCD.extend_u0(32, cd_signed); +endcode + +code + accept; +endcode + +// ####################### + +subpattern in_dffe +arg argD argQ clock clock_pol + +code + dff = nullptr; + for (auto c : argQ.chunks()) { + if (!c.wire) + reject; + if (c.wire->get_bool_attribute(\keep)) reject; - if ((actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool())) + Const init = c.wire->attributes.at(\init, State::Sx); + if (!init.is_fully_undef() && !init.is_fully_zero()) reject; } endcode -match muxA - if addAB - select muxA->type.in($mux) - select nusers(port(muxA, \A)) == 2 - index <SigSpec> port(muxA, \A) === port(addAB, \Y) - optional +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + set ffoffset offset endmatch -match muxB - if addAB - if !muxA - select muxB->type.in($mux) - select nusers(port(muxB, \B)) == 2 - index <SigSpec> port(muxB, \B) === port(addAB, \Y) - optional +code argQ argD +{ + if (clock != SigBit()) { + if (port(ff, \CLK) != clock) + reject; + if (param(ff, \CLK_POLARITY).as_bool() != clock_pol) + reject; + } + + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffclock_pol = param(ff, \CLK_POLARITY).as_bool(); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +} +endcode + +match ffrstmux + if false /* TODO: ice40 resets are actually async */ + + if !argD.empty() + select ffrstmux->type.in($mux) + index <SigSpec> port(ffrstmux, \Y) === argD + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <bool> pol (BA == \B) + set ffrstpol pol + semioptional endmatch -code muxAB - muxAB = addAB; - if (muxA) - muxAB = muxA; - if (muxB) - muxAB = muxB; +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffholdmux if argQ has at + // least 3 users (ff, <upstream>, ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; endcode -match ffS - if muxAB - select ffS->type.in($dff) - select nusers(port(ffS, \D)) == 2 - index <SigSpec> port(ffS, \D) === port(muxAB, \Y) - index <SigSpec> port(ffS, \Q) === sigS +match ffholdmux + if !argD.empty() + select ffholdmux->type.in($mux) + index <SigSpec> port(ffholdmux, \Y) === argD + choice <IdString> BA {\B, \A} + index <SigSpec> port(ffholdmux, BA) === argQ + define <bool> pol (BA == \B) + set ffholdpol pol + semioptional endmatch -code clock clock_pol clock_vld - if (ffS) { - SigBit c = port(ffS, \CLK).as_bit(); - bool cp = param(ffS, \CLK_POLARITY).as_bool(); +code argD + if (ffholdmux) { + dffholdmux = ffholdmux; + dffholdpol = ffholdpol; + argD = port(ffholdmux, ffholdpol ? \A : \B); + dffD.replace(port(ffholdmux, \Y), argD); + } + else + dffholdmux = nullptr; +endcode - if (clock_vld && (c != clock || cp != clock_pol)) +// ####################### + +subpattern out_dffe +arg argD argQ clock clock_pol + +code + dff = nullptr; + for (auto c : argD.chunks()) + if (c.wire->get_bool_attribute(\keep)) reject; +endcode - clock = c; - clock_pol = cp; - clock_vld = true; +match ffholdmux + select ffholdmux->type.in($mux) + // ffholdmux output must have two users: ffholdmux and ff.D + select nusers(port(ffholdmux, \Y)) == 2 + + choice <IdString> BA {\B, \A} + // keep-last-value net must have at least three users: ffholdmux, ff, downstream sink(s) + select nusers(port(ffholdmux, BA)) >= 3 + + slice offset GetSize(port(ffholdmux, \Y)) + define <IdString> AB (BA == \B ? \A : \B) + index <SigBit> port(ffholdmux, AB)[offset] === argD[0] + + // Check that the rest of argD is present + filter GetSize(port(ffholdmux, AB)) >= offset + GetSize(argD) + filter port(ffholdmux, AB).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define <bool> pol (BA == \B) + set ffholdpol pol + + semioptional +endmatch + +code argD argQ + dffholdmux = ffholdmux; + if (ffholdmux) { + SigSpec AB = port(ffholdmux, ffholdpol ? \A : \B); + SigSpec Y = port(ffholdmux, \Y); + argQ = argD; + argD.replace(AB, Y); + argQ.replace(AB, port(ffholdmux, ffholdpol ? \B : \A)); + + dffholdmux = ffholdmux; + dffholdpol = ffholdpol; } - accept; +endcode + +match ffrstmux + if false /* TODO: ice40 resets are actually async */ + + select ffrstmux->type.in($mux) + // ffrstmux output must have two users: ffrstmux and ff.D + select nusers(port(ffrstmux, \Y)) == 2 + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + slice offset GetSize(port(ffrstmux, \Y)) + define <IdString> AB (BA == \B ? \A : \B) + index <SigBit> port(ffrstmux, AB)[offset] === argD[0] + + // Check that offset is consistent + filter !ffholdmux || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) + filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define <bool> pol (AB == \A) + set ffrstpol pol + + semioptional +endmatch + +code argD argQ + dffrstmux = ffrstmux; + if (ffrstmux) { + SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); + SigSpec Y = port(ffrstmux, \Y); + argD.replace(AB, Y); + + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + } +endcode + +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \D)[offset] === argD[0] + + // Check that offset is consistent + filter (!ffholdmux && !ffrstmux) || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ff, \D)) >= offset + GetSize(argD) + filter port(ff, \D).extract(offset, GetSize(argD)) == argD + // Check that FF.Q is connected to CE-mux + filter !ffholdmux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + set ffoffset offset +endmatch + +code argQ + if (ff) { + if (clock != SigBit()) { + if (port(ff, \CLK) != clock) + reject; + if (param(ff, \CLK_POLARITY).as_bool() != clock_pol) + reject; + } + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffholdmux) { + argQ = argD; + argQ.replace(D, Q); + } + + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, State::Sx); + if (!init.is_fully_undef() && !init.is_fully_zero()) + reject; + } + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); + dffclock_pol = param(ff, \CLK_POLARITY).as_bool(); + } + // No enable/reset mux possible without flop + else if (dffholdmux || dffrstmux) + reject; endcode diff --git a/passes/pmgen/ice40_wrapcarry.pmg b/passes/pmgen/ice40_wrapcarry.pmg index 9e64c7467..bb59edb0c 100644 --- a/passes/pmgen/ice40_wrapcarry.pmg +++ b/passes/pmgen/ice40_wrapcarry.pmg @@ -9,3 +9,7 @@ match lut index <SigSpec> port(lut, \I1) === port(carry, \I0) index <SigSpec> port(lut, \I2) === port(carry, \I1) endmatch + +code + accept; +endcode diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index 72b02127a..2230145df 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -24,8 +24,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN bool did_something; +dict<SigBit, State> initbits; +pool<SigBit> rminitbits; #include "passes/pmgen/peepopt_pm.h" +#include "generate.h" struct PeepoptPass : public Pass { PeepoptPass() : Pass("peepopt", "collection of peephole optimizers") { } @@ -40,27 +43,86 @@ struct PeepoptPass : public Pass { } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { + std::string genmode; + log_header(design, "Executing PEEPOPT pass (run peephole optimizers).\n"); size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-singleton") { - // singleton_mode = true; - // continue; - // } + if (args[argidx] == "-generate" && argidx+1 < args.size()) { + genmode = args[++argidx]; + continue; + } break; } extra_args(args, argidx, design); - for (auto module : design->selected_modules()) { + if (!genmode.empty()) + { + initbits.clear(); + rminitbits.clear(); + + if (genmode == "shiftmul") + GENERATE_PATTERN(peepopt_pm, shiftmul); + else if (genmode == "muldiv") + GENERATE_PATTERN(peepopt_pm, muldiv); + else if (genmode == "dffmux") + GENERATE_PATTERN(peepopt_pm, dffmux); + else + log_abort(); + return; + } + + for (auto module : design->selected_modules()) + { did_something = true; - while (did_something) { + + while (did_something) + { did_something = false; - peepopt_pm pm(module, module->selected_cells()); + initbits.clear(); + rminitbits.clear(); + + peepopt_pm pm(module); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (sig[i].wire == nullptr) + continue; + if (val[i] != State::S0 && val[i] != State::S1) + continue; + initbits[sig[i]] = val[i]; + } + } + } + + pm.setup(module->selected_cells()); + pm.run_shiftmul(); pm.run_muldiv(); pm.run_dffmux(); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const &val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (rminitbits.count(sig[i])) + val[i] = State::Sx; + } + } + } + + initbits.clear(); + rminitbits.clear(); } } } diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index c88a52226..0069b0570 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -8,21 +8,23 @@ match dff select GetSize(port(dff, \D)) > 1 endmatch +code sigD + sigD = port(dff, \D); +endcode + match rstmux select rstmux->type == $mux select GetSize(port(rstmux, \Y)) > 1 - index <SigSpec> port(rstmux, \Y) === port(dff, \D) + index <SigSpec> port(rstmux, \Y) === sigD choice <IdString> BA {\B, \A} select port(rstmux, BA).is_fully_const() set rstmuxBA BA - optional + semioptional endmatch code sigD if (rstmux) sigD = port(rstmux, rstmuxBA == \B ? \A : \B); - else - sigD = port(dff, \D); endcode match cemux @@ -32,67 +34,111 @@ match cemux choice <IdString> AB {\A, \B} index <SigSpec> port(cemux, AB) === port(dff, \Q) set cemuxAB AB + semioptional endmatch code - SigSpec D = port(cemux, cemuxAB == \A ? \B : \A); - SigSpec Q = port(dff, \Q); + if (!cemux && !rstmux) + reject; +endcode + +code Const rst; - if (rstmux) + SigSpec D; + if (cemux) { + D = port(cemux, cemuxAB == \A ? \B : \A); + if (rstmux) + rst = port(rstmux, rstmuxBA).as_const(); + else + rst = Const(State::Sx, GetSize(D)); + } + else { + log_assert(rstmux); + D = port(rstmux, rstmuxBA == \B ? \A : \B); rst = port(rstmux, rstmuxBA).as_const(); + } + SigSpec Q = port(dff, \Q); int width = GetSize(D); - SigSpec &ceA = cemux->connections_.at(\A); - SigSpec &ceB = cemux->connections_.at(\B); - SigSpec &ceY = cemux->connections_.at(\Y); - SigSpec &dffD = dff->connections_.at(\D); - SigSpec &dffQ = dff->connections_.at(\Q); + SigSpec dffD = dff->getPort(\D); + SigSpec dffQ = dff->getPort(\Q); - if (D[width-1] == D[width-2]) { - did_something = true; + Const initval; + for (auto b : Q) { + auto it = initbits.find(b); + initval.bits.push_back(it == initbits.end() ? State::Sx : it->second); + } - SigBit sign = D[width-1]; - bool is_signed = sign.wire; - int i; - for (i = width-1; i >= 2; i--) { - if (!is_signed) { - module->connect(Q[i], sign); - if (D[i-1] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } - else { - module->connect(Q[i], Q[i-1]); - if (D[i-2] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } - } + auto cmpx = [=](State lhs, State rhs) { + if (lhs == State::Sx || rhs == State::Sx) + return true; + return lhs == rhs; + }; - ceA.remove(i, width-i); - ceB.remove(i, width-i); - ceY.remove(i, width-i); - cemux->fixup_parameters(); - dffD.remove(i, width-i); - dffQ.remove(i, width-i); + int i = width-1; + while (i > 1) { + if (D[i] != D[i-1]) + break; + if (!cmpx(rst[i], rst[i-1])) + break; + if (!cmpx(initval[i], initval[i-1])) + break; + if (!cmpx(rst[i], initval[i])) + break; + rminitbits.insert(Q[i]); + module->connect(Q[i], Q[i-1]); + i--; + } + if (i < width-1) { + did_something = true; + if (cemux) { + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); + ceA.remove(i, width-1-i); + ceB.remove(i, width-1-i); + ceY.remove(i, width-1-i); + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); + cemux->fixup_parameters(); + blacklist(cemux); + } + if (rstmux) { + SigSpec rstA = rstmux->getPort(\A); + SigSpec rstB = rstmux->getPort(\B); + SigSpec rstY = rstmux->getPort(\Y); + rstA.remove(i, width-1-i); + rstB.remove(i, width-1-i); + rstY.remove(i, width-1-i); + rstmux->setPort(\A, rstA); + rstmux->setPort(\B, rstB); + rstmux->setPort(\Y, rstY); + rstmux->fixup_parameters(); + blacklist(rstmux); + } + dffD.remove(i, width-1-i); + dffQ.remove(i, width-1-i); + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); + blacklist(dff); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux), width-i); - accept; + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i); + width = i+1; } - else { + if (cemux) { + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); + int count = 0; for (int i = width-1; i >= 0; i--) { if (D[i].wire) continue; - Wire *w = Q[i].wire; - auto it = w->attributes.find(\init); - State init; - if (it != w->attributes.end()) - init = it->second[Q[i].offset]; - else - init = State::Sx; - - if (init == State::Sx || init == D[i].data) { + if (cmpx(rst[i], D[i].data) && cmpx(initval[i], D[i].data)) { count++; + rminitbits.insert(Q[i]); module->connect(Q[i], D[i]); ceA.remove(i); ceB.remove(i); @@ -101,13 +147,25 @@ code dffQ.remove(i); } } - if (count > 0) { + if (count > 0) + { did_something = true; + + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); cemux->fixup_parameters(); + blacklist(cemux); + + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), count); + blacklist(dff); + + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count); } + } + if (did_something) accept; - } endcode diff --git a/passes/pmgen/pmgen.py b/passes/pmgen/pmgen.py index 573722d68..df0ffaff2 100644 --- a/passes/pmgen/pmgen.py +++ b/passes/pmgen/pmgen.py @@ -286,7 +286,7 @@ def process_pmgfile(f, filename): block["gencode"].append(rewrite_cpp(l.rstrip())) break - assert False + raise RuntimeError("'%s' statement not recognised on line %d" % (a[0], linenr)) if block["optional"]: assert not block["semioptional"] @@ -305,7 +305,8 @@ def process_pmgfile(f, filename): block["states"] = set() for s in line.split()[1:]: - assert s in state_types[current_pattern] + if s not in state_types[current_pattern]: + raise RuntimeError("'%s' not in state_types" % s) block["states"].add(s) codetype = "code" @@ -327,7 +328,7 @@ def process_pmgfile(f, filename): blocks.append(block) continue - assert False + raise RuntimeError("'%s' command not recognised" % cmd) for fn in pmgfiles: with open(fn, "r") as f: @@ -361,6 +362,7 @@ with open(outfile, "w") as f: print(" Module *module;", file=f) print(" SigMap sigmap;", file=f) print(" std::function<void()> on_accept;", file=f) + print(" bool setup_done;", file=f) print(" bool generate_mode;", file=f) print(" int accept_cnt;", file=f) print("", file=f) @@ -452,11 +454,19 @@ with open(outfile, "w") as f: print(" return sigmap(cell->getPort(portname));", file=f) print(" }", file=f) print("", file=f) + print(" SigSpec port(Cell *cell, IdString portname, const SigSpec& defval) {", file=f) + print(" return sigmap(cell->connections_.at(portname, defval));", file=f) + print(" }", file=f) + print("", file=f) print(" Const param(Cell *cell, IdString paramname) {", file=f) print(" return cell->getParam(paramname);", file=f) print(" }", file=f) print("", file=f) + print(" Const param(Cell *cell, IdString paramname, const Const& defval) {", file=f) + print(" return cell->parameters.at(paramname, defval);", file=f) + print(" }", file=f) + print("", file=f) print(" int nusers(const SigSpec &sig) {", file=f) print(" pool<Cell*> users;", file=f) @@ -468,7 +478,17 @@ with open(outfile, "w") as f: print("", file=f) print(" {}_pm(Module *module, const vector<Cell*> &cells) :".format(prefix), file=f) - print(" module(module), sigmap(module), generate_mode(false), rngseed(12345678) {", file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" setup(cells);", file=f) + print(" }", file=f) + print("", file=f) + + print(" {}_pm(Module *module) :".format(prefix), file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" }", file=f) + print("", file=f) + + print(" void setup(const vector<Cell*> &cells) {", file=f) for current_pattern in sorted(patterns.keys()): for s, t in sorted(udata_types[current_pattern].items()): if t.endswith("*"): @@ -476,6 +496,8 @@ with open(outfile, "w") as f: else: print(" ud_{}.{} = {}();".format(current_pattern, s, t), file=f) current_pattern = None + print(" log_assert(!setup_done);", file=f) + print(" setup_done = true;", file=f) print(" for (auto port : module->ports)", file=f) print(" add_siguser(module->wire(port), nullptr);", file=f) print(" for (auto cell : module->cells())", file=f) @@ -530,6 +552,7 @@ with open(outfile, "w") as f: for current_pattern in sorted(patterns.keys()): print(" int run_{}(std::function<void()> on_accept_f) {{".format(current_pattern), file=f) + print(" log_assert(setup_done);", file=f) print(" accept_cnt = 0;", file=f) print(" on_accept = on_accept_f;", file=f) print(" rollback = 0;", file=f) diff --git a/passes/pmgen/test_pmgen.cc b/passes/pmgen/test_pmgen.cc index 4f3eec935..72dc18dcc 100644 --- a/passes/pmgen/test_pmgen.cc +++ b/passes/pmgen/test_pmgen.cc @@ -23,13 +23,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -// for peepopt_pm -bool did_something; - #include "passes/pmgen/test_pmgen_pm.h" #include "passes/pmgen/ice40_dsp_pm.h" #include "passes/pmgen/xilinx_srl_pm.h" -#include "passes/pmgen/peepopt_pm.h" + +#include "generate.h" void reduce_chain(test_pmgen_pm &pm) { @@ -118,123 +116,6 @@ void opt_eqpmux(test_pmgen_pm &pm) log(" -> %s (%s)\n", log_id(c), log_id(c->type)); } -#define GENERATE_PATTERN(pmclass, pattern) \ - generate_pattern<pmclass>([](pmclass &pm, std::function<void()> f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) - -void pmtest_addports(Module *module) -{ - pool<SigBit> driven_bits, used_bits; - SigMap sigmap(module); - int icnt = 0, ocnt = 0; - - for (auto cell : module->cells()) - for (auto conn : cell->connections()) - { - if (cell->input(conn.first)) - for (auto bit : sigmap(conn.second)) - used_bits.insert(bit); - if (cell->output(conn.first)) - for (auto bit : sigmap(conn.second)) - driven_bits.insert(bit); - } - - for (auto wire : vector<Wire*>(module->wires())) - { - SigSpec ibits, obits; - for (auto bit : sigmap(wire)) { - if (!used_bits.count(bit)) - obits.append(bit); - if (!driven_bits.count(bit)) - ibits.append(bit); - } - if (!ibits.empty()) { - Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); - w->port_input = true; - module->connect(ibits, w); - } - if (!obits.empty()) { - Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); - w->port_output = true; - module->connect(w, obits); - } - } - - module->fixup_ports(); -} - -template <class pm> -void generate_pattern(std::function<void(pm&,std::function<void()>)> run, const char *pmclass, const char *pattern, Design *design) -{ - log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); - - int modcnt = 0; - int maxmodcnt = 100; - int maxsubcnt = 4; - int timeout = 0; - vector<Module*> mods; - - while (modcnt < maxmodcnt) - { - int submodcnt = 0, itercnt = 0, cellcnt = 0; - Module *mod = design->addModule(NEW_ID); - - while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) - { - if (timeout++ > 10000) - log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); - - pm matcher(mod, mod->cells()); - - matcher.rng(1); - matcher.rngseed += modcnt; - matcher.rng(1); - matcher.rngseed += submodcnt; - matcher.rng(1); - matcher.rngseed += itercnt; - matcher.rng(1); - matcher.rngseed += cellcnt; - matcher.rng(1); - - if (GetSize(mod->cells()) != cellcnt) - { - bool found_match = false; - run(matcher, [&](){ found_match = true; }); - cellcnt = GetSize(mod->cells()); - - if (found_match) { - Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", - pmclass, pattern, modcnt++)); - log("Creating module %s with %d cells.\n", log_id(m), cellcnt); - mod->cloneInto(m); - pmtest_addports(m); - mods.push_back(m); - submodcnt++; - timeout = 0; - } - } - - matcher.generate_mode = true; - run(matcher, [](){}); - } - - if (submodcnt && maxsubcnt < (1 << 16)) - maxsubcnt *= 2; - - design->remove(mod); - } - - Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); - log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); - for (auto mod : mods) { - Cell *c = m->addCell(mod->name, mod->name); - for (auto port : mod->ports) { - Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); - c->setPort(port, w); - } - } - pmtest_addports(m); -} - struct TestPmgenPass : public Pass { TestPmgenPass() : Pass("test_pmgen", "test pass for pmgen") { } void help() YS_OVERRIDE @@ -355,12 +236,6 @@ struct TestPmgenPass : public Pass { if (pattern == "xilinx_srl.variable") return GENERATE_PATTERN(xilinx_srl_pm, variable); - if (pattern == "peepopt-muldiv") - return GENERATE_PATTERN(peepopt_pm, muldiv); - - if (pattern == "peepopt-shiftmul") - return GENERATE_PATTERN(peepopt_pm, shiftmul); - log_cmd_error("Unknown pattern: %s\n", pattern.c_str()); } diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc new file mode 100644 index 000000000..054e123e4 --- /dev/null +++ b/passes/pmgen/xilinx_dsp.cc @@ -0,0 +1,646 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at> + * 2019 Eddie Hung <eddie@fpgeh.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" +#include <deque> + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "passes/pmgen/xilinx_dsp_pm.h" +#include "passes/pmgen/xilinx_dsp_CREG_pm.h" +#include "passes/pmgen/xilinx_dsp_cascade_pm.h" + +static Cell* addDsp(Module *module) { + Cell *cell = module->addCell(NEW_ID, ID(DSP48E1)); + cell->setParam(ID(ACASCREG), 0); + cell->setParam(ID(ADREG), 0); + cell->setParam(ID(A_INPUT), Const("DIRECT")); + cell->setParam(ID(ALUMODEREG), 0); + cell->setParam(ID(AREG), 0); + cell->setParam(ID(BCASCREG), 0); + cell->setParam(ID(B_INPUT), Const("DIRECT")); + cell->setParam(ID(BREG), 0); + cell->setParam(ID(CARRYINREG), 0); + cell->setParam(ID(CARRYINSELREG), 0); + cell->setParam(ID(CREG), 0); + cell->setParam(ID(DREG), 0); + cell->setParam(ID(INMODEREG), 0); + cell->setParam(ID(MREG), 0); + cell->setParam(ID(OPMODEREG), 0); + cell->setParam(ID(PREG), 0); + cell->setParam(ID(USE_MULT), Const("NONE")); + cell->setParam(ID(USE_SIMD), Const("ONE48")); + cell->setParam(ID(USE_DPORT), Const("FALSE")); + + cell->setPort(ID(D), Const(0, 25)); + cell->setPort(ID(INMODE), Const(0, 5)); + cell->setPort(ID(ALUMODE), Const(0, 4)); + cell->setPort(ID(OPMODE), Const(0, 7)); + cell->setPort(ID(CARRYINSEL), Const(0, 3)); + cell->setPort(ID(ACIN), Const(0, 30)); + cell->setPort(ID(BCIN), Const(0, 18)); + cell->setPort(ID(PCIN), Const(0, 48)); + cell->setPort(ID(CARRYIN), Const(0, 1)); + return cell; +} + +void xilinx_simd_pack(Module *module, const std::vector<Cell*> &selected_cells) +{ + std::deque<Cell*> simd12_add, simd12_sub; + std::deque<Cell*> simd24_add, simd24_sub; + + for (auto cell : selected_cells) { + if (!cell->type.in(ID($add), ID($sub))) + continue; + SigSpec Y = cell->getPort(ID(Y)); + if (!Y.is_chunk()) + continue; + if (!Y.as_chunk().wire->get_strpool_attribute(ID(use_dsp)).count("simd")) + continue; + if (GetSize(Y) > 25) + continue; + SigSpec A = cell->getPort(ID(A)); + SigSpec B = cell->getPort(ID(B)); + if (GetSize(Y) <= 13) { + if (GetSize(A) > 12) + continue; + if (GetSize(B) > 12) + continue; + if (cell->type == ID($add)) + simd12_add.push_back(cell); + else if (cell->type == ID($sub)) + simd12_sub.push_back(cell); + } + else if (GetSize(Y) <= 25) { + if (GetSize(A) > 24) + continue; + if (GetSize(B) > 24) + continue; + if (cell->type == ID($add)) + simd24_add.push_back(cell); + else if (cell->type == ID($sub)) + simd24_sub.push_back(cell); + } + else + log_abort(); + } + + auto f12 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort(ID(A)); + SigSpec B = lane->getPort(ID(B)); + SigSpec Y = lane->getPort(ID(Y)); + A.extend_u0(12, lane->getParam(ID(A_SIGNED)).as_bool()); + B.extend_u0(12, lane->getParam(ID(B_SIGNED)).as_bool()); + AB.append(A); + C.append(B); + if (GetSize(Y) < 13) + Y.append(module->addWire(NEW_ID, 13-GetSize(Y))); + else + log_assert(GetSize(Y) == 13); + P.append(Y.extract(0, 12)); + CARRYOUT.append(Y[12]); + }; + auto g12 = [&f12,module](std::deque<Cell*> &simd12) { + while (simd12.size() > 1) { + SigSpec AB, C, P, CARRYOUT; + + Cell *lane1 = simd12.front(); + simd12.pop_front(); + Cell *lane2 = simd12.front(); + simd12.pop_front(); + Cell *lane3 = nullptr; + Cell *lane4 = nullptr; + + if (!simd12.empty()) { + lane3 = simd12.front(); + simd12.pop_front(); + if (!simd12.empty()) { + lane4 = simd12.front(); + simd12.pop_front(); + } + } + + log("Analysing %s.%s for Xilinx DSP SIMD12 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam(ID(USE_SIMD), Const("FOUR12")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort(ID(OPMODE), Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f12(AB, C, P, CARRYOUT, lane1); + f12(AB, C, P, CARRYOUT, lane2); + if (lane3) { + f12(AB, C, P, CARRYOUT, lane3); + if (lane4) + f12(AB, C, P, CARRYOUT, lane4); + else { + AB.append(Const(0, 12)); + C.append(Const(0, 12)); + P.append(module->addWire(NEW_ID, 12)); + CARRYOUT.append(module->addWire(NEW_ID, 1)); + } + } + else { + AB.append(Const(0, 24)); + C.append(Const(0, 24)); + P.append(module->addWire(NEW_ID, 24)); + CARRYOUT.append(module->addWire(NEW_ID, 2)); + } + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort(ID(A), AB.extract(18, 30)); + cell->setPort(ID(B), AB.extract(0, 18)); + cell->setPort(ID(C), C); + cell->setPort(ID(P), P); + cell->setPort(ID(CARRYOUT), CARRYOUT); + if (lane1->type == ID($sub)) + cell->setPort(ID(ALUMODE), Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + if (lane3) module->remove(lane3); + if (lane4) module->remove(lane4); + + module->design->select(module, cell); + } + }; + g12(simd12_add); + g12(simd12_sub); + + auto f24 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) { + SigSpec A = lane->getPort(ID(A)); + SigSpec B = lane->getPort(ID(B)); + SigSpec Y = lane->getPort(ID(Y)); + A.extend_u0(24, lane->getParam(ID(A_SIGNED)).as_bool()); + B.extend_u0(24, lane->getParam(ID(B_SIGNED)).as_bool()); + C.append(A); + AB.append(B); + if (GetSize(Y) < 25) + Y.append(module->addWire(NEW_ID, 25-GetSize(Y))); + else + log_assert(GetSize(Y) == 25); + P.append(Y.extract(0, 24)); + CARRYOUT.append(module->addWire(NEW_ID)); // TWO24 uses every other bit + CARRYOUT.append(Y[24]); + }; + auto g24 = [&f24,module](std::deque<Cell*> &simd24) { + while (simd24.size() > 1) { + SigSpec AB; + SigSpec C; + SigSpec P; + SigSpec CARRYOUT; + + Cell *lane1 = simd24.front(); + simd24.pop_front(); + Cell *lane2 = simd24.front(); + simd24.pop_front(); + + log("Analysing %s.%s for Xilinx DSP SIMD24 packing.\n", log_id(module), log_id(lane1)); + + Cell *cell = addDsp(module); + cell->setParam(ID(USE_SIMD), Const("TWO24")); + // X = A:B + // Y = 0 + // Z = C + cell->setPort(ID(OPMODE), Const::from_string("0110011")); + + log_assert(lane1); + log_assert(lane2); + f24(AB, C, P, CARRYOUT, lane1); + f24(AB, C, P, CARRYOUT, lane2); + log_assert(GetSize(AB) == 48); + log_assert(GetSize(C) == 48); + log_assert(GetSize(P) == 48); + log_assert(GetSize(CARRYOUT) == 4); + cell->setPort(ID(A), AB.extract(18, 30)); + cell->setPort(ID(B), AB.extract(0, 18)); + cell->setPort(ID(C), C); + cell->setPort(ID(P), P); + cell->setPort(ID(CARRYOUT), CARRYOUT); + if (lane1->type == ID($sub)) + cell->setPort(ID(ALUMODE), Const::from_string("0011")); + + module->remove(lane1); + module->remove(lane2); + + module->design->select(module, cell); + } + }; + g24(simd24_add); + g24(simd24_sub); +} + +void xilinx_dsp_pack(xilinx_dsp_pm &pm) +{ + auto &st = pm.st_xilinx_dsp_pack; + + log("Analysing %s.%s for Xilinx DSP packing.\n", log_id(pm.module), log_id(st.dsp)); + + log_debug("preAdd: %s\n", log_id(st.preAdd, "--")); + log_debug("ffAD: %s %s %s\n", log_id(st.ffAD, "--"), log_id(st.ffADcemux, "--"), log_id(st.ffADrstmux, "--")); + log_debug("ffA2: %s %s %s\n", log_id(st.ffA2, "--"), log_id(st.ffA2cemux, "--"), log_id(st.ffA2rstmux, "--")); + log_debug("ffA1: %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--")); + log_debug("ffB2: %s %s %s\n", log_id(st.ffB2, "--"), log_id(st.ffB2cemux, "--"), log_id(st.ffB2rstmux, "--")); + log_debug("ffB1: %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--")); + log_debug("ffD: %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--")); + log_debug("dsp: %s\n", log_id(st.dsp, "--")); + log_debug("ffM: %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--")); + log_debug("postAdd: %s\n", log_id(st.postAdd, "--")); + log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--")); + log_debug("ffP: %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--")); + log_debug("overflow: %s\n", log_id(st.overflow, "--")); + + Cell *cell = st.dsp; + + if (st.preAdd) { + log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type)); + bool A_SIGNED = st.preAdd->getParam(ID(A_SIGNED)).as_bool(); + bool D_SIGNED = st.preAdd->getParam(ID(B_SIGNED)).as_bool(); + if (st.sigA == st.preAdd->getPort(ID(B))) + std::swap(A_SIGNED, D_SIGNED); + st.sigA.extend_u0(30, A_SIGNED); + st.sigD.extend_u0(25, D_SIGNED); + cell->setPort(ID(A), st.sigA); + cell->setPort(ID(D), st.sigD); + cell->setPort(ID(INMODE), Const::from_string("00100")); + + if (st.ffAD) { + if (st.ffADcemux) { + SigSpec S = st.ffADcemux->getPort(ID(S)); + cell->setPort(ID(CEAD), st.ffADcepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort(ID(CEAD), State::S1); + cell->setParam(ID(ADREG), 1); + } + + cell->setParam(ID(USE_DPORT), Const("TRUE")); + + pm.autoremove(st.preAdd); + } + if (st.postAdd) { + log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type)); + + SigSpec &opmode = cell->connections_.at(ID(OPMODE)); + if (st.postAddMux) { + log_assert(st.ffP); + opmode[4] = st.postAddMux->getPort(ID(S)); + pm.autoremove(st.postAddMux); + } + else if (st.ffP && st.sigC == st.sigP) + opmode[4] = State::S0; + else + opmode[4] = State::S1; + opmode[6] = State::S0; + opmode[5] = State::S1; + + if (opmode[4] != State::S0) { + if (st.postAddMuxAB == ID(A)) + st.sigC.extend_u0(48, st.postAdd->getParam(ID(B_SIGNED)).as_bool()); + else + st.sigC.extend_u0(48, st.postAdd->getParam(ID(A_SIGNED)).as_bool()); + cell->setPort(ID(C), st.sigC); + } + + pm.autoremove(st.postAdd); + } + if (st.overflow) { + log(" overflow %s (%s)\n", log_id(st.overflow), log_id(st.overflow->type)); + cell->setParam(ID(USE_PATTERN_DETECT), Const("PATDET")); + cell->setParam(ID(SEL_PATTERN), Const("PATTERN")); + cell->setParam(ID(SEL_MASK), Const("MASK")); + + if (st.overflow->type == ID($ge)) { + Const B = st.overflow->getPort(ID(B)).as_const(); + log_assert(std::count(B.bits.begin(), B.bits.end(), State::S1) == 1); + // Since B is an exact power of 2, subtract 1 + // by inverting all bits up until hitting + // that one hi bit + for (auto &b : B.bits) + if (b == State::S0) b = State::S1; + else if (b == State::S1) { + b = State::S0; + break; + } + B.extu(48); + + cell->setParam(ID(MASK), B); + cell->setParam(ID(PATTERN), Const(0, 48)); + cell->setPort(ID(OVERFLOW), st.overflow->getPort(ID(Y))); + } + else log_abort(); + + pm.autoremove(st.overflow); + } + + if (st.clock != SigBit()) + { + cell->setPort(ID(CLK), st.clock); + + auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) { + SigSpec D = ff->getPort(ID(D)); + SigSpec Q = pm.sigmap(ff->getPort(ID(Q))); + if (!A.empty()) + A.replace(Q, D); + if (rstmux) { + SigSpec Y = rstmux->getPort(ID(Y)); + SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B)); + if (!A.empty()) + A.replace(Y, AB); + if (rstport != IdString()) { + SigSpec S = rstmux->getPort(ID(S)); + cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S)); + } + } + else if (rstport != IdString()) + cell->setPort(rstport, State::S0); + if (cemux) { + SigSpec Y = cemux->getPort(ID(Y)); + SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A)); + SigSpec S = cemux->getPort(ID(S)); + if (!A.empty()) + A.replace(Y, BA); + cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort(ceport, State::S1); + + for (auto c : Q.chunks()) { + auto it = c.wire->attributes.find(ID(init)); + if (it == c.wire->attributes.end()) + continue; + for (int i = c.offset; i < c.offset+c.width; i++) { + log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); + it->second[i] = State::Sx; + } + } + }; + + if (st.ffA2) { + SigSpec A = cell->getPort(ID(A)); + f(A, st.ffA2, st.ffA2cemux, st.ffA2cepol, ID(CEA2), st.ffA2rstmux, st.ffArstpol, ID(RSTA)); + if (st.ffA1) { + f(A, st.ffA1, st.ffA1cemux, st.ffA1cepol, ID(CEA1), st.ffA1rstmux, st.ffArstpol, IdString()); + cell->setParam(ID(AREG), 2); + cell->setParam(ID(ACASCREG), 2); + } + else { + cell->setParam(ID(AREG), 1); + cell->setParam(ID(ACASCREG), 1); + } + pm.add_siguser(A, cell); + cell->setPort(ID(A), A); + } + if (st.ffB2) { + SigSpec B = cell->getPort(ID(B)); + f(B, st.ffB2, st.ffB2cemux, st.ffB2cepol, ID(CEB2), st.ffB2rstmux, st.ffBrstpol, ID(RSTB)); + if (st.ffB1) { + f(B, st.ffB1, st.ffB1cemux, st.ffB1cepol, ID(CEB1), st.ffB1rstmux, st.ffBrstpol, IdString()); + cell->setParam(ID(BREG), 2); + cell->setParam(ID(BCASCREG), 2); + } + else { + cell->setParam(ID(BREG), 1); + cell->setParam(ID(BCASCREG), 1); + } + pm.add_siguser(B, cell); + cell->setPort(ID(B), B); + } + if (st.ffD) { + SigSpec D = cell->getPort(ID(D)); + f(D, st.ffD, st.ffDcemux, st.ffDcepol, ID(CED), st.ffDrstmux, st.ffDrstpol, ID(RSTD)); + pm.add_siguser(D, cell); + cell->setPort(ID(D), D); + cell->setParam(ID(DREG), 1); + } + if (st.ffM) { + SigSpec M; // unused + f(M, st.ffM, st.ffMcemux, st.ffMcepol, ID(CEM), st.ffMrstmux, st.ffMrstpol, ID(RSTM)); + st.ffM->connections_.at(ID(Q)).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM))); + cell->setParam(ID(MREG), State::S1); + } + if (st.ffP) { + SigSpec P; // unused + f(P, st.ffP, st.ffPcemux, st.ffPcepol, ID(CEP), st.ffPrstmux, st.ffPrstpol, ID(RSTP)); + st.ffP->connections_.at(ID(Q)).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP))); + cell->setParam(ID(PREG), State::S1); + } + + log(" clock: %s (%s)", log_signal(st.clock), "posedge"); + + if (st.ffA2) { + log(" ffA2:%s", log_id(st.ffA2)); + if (st.ffA1) + log(" ffA1:%s", log_id(st.ffA1)); + } + + if (st.ffAD) + log(" ffAD:%s", log_id(st.ffAD)); + + if (st.ffB2) { + log(" ffB2:%s", log_id(st.ffB2)); + if (st.ffB1) + log(" ffB1:%s", log_id(st.ffB1)); + } + + if (st.ffD) + log(" ffD:%s", log_id(st.ffD)); + + if (st.ffM) + log(" ffM:%s", log_id(st.ffM)); + + if (st.ffP) + log(" ffP:%s", log_id(st.ffP)); + } + log("\n"); + + SigSpec P = st.sigP; + if (GetSize(P) < 48) + P.append(pm.module->addWire(NEW_ID, 48-GetSize(P))); + cell->setPort(ID(P), P); + + pm.blacklist(cell); +} + +void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm) +{ + auto &st = pm.st_xilinx_dsp_packC; + + log_debug("Analysing %s.%s for Xilinx DSP packing (CREG).\n", log_id(pm.module), log_id(st.dsp)); + log_debug("ffC: %s %s %s\n", log_id(st.ffC, "--"), log_id(st.ffCcemux, "--"), log_id(st.ffCrstmux, "--")); + + Cell *cell = st.dsp; + + if (st.clock != SigBit()) + { + cell->setPort(ID(CLK), st.clock); + + auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) { + SigSpec D = ff->getPort(ID(D)); + SigSpec Q = pm.sigmap(ff->getPort(ID(Q))); + if (!A.empty()) + A.replace(Q, D); + if (rstmux) { + SigSpec Y = rstmux->getPort(ID(Y)); + SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B)); + if (!A.empty()) + A.replace(Y, AB); + if (rstport != IdString()) { + SigSpec S = rstmux->getPort(ID(S)); + cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S)); + } + } + else if (rstport != IdString()) + cell->setPort(rstport, State::S0); + if (cemux) { + SigSpec Y = cemux->getPort(ID(Y)); + SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A)); + SigSpec S = cemux->getPort(ID(S)); + if (!A.empty()) + A.replace(Y, BA); + cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S)); + } + else + cell->setPort(ceport, State::S1); + + for (auto c : Q.chunks()) { + auto it = c.wire->attributes.find(ID(init)); + if (it == c.wire->attributes.end()) + continue; + for (int i = c.offset; i < c.offset+c.width; i++) { + log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); + it->second[i] = State::Sx; + } + } + }; + + if (st.ffC) { + SigSpec C = cell->getPort(ID(C)); + f(C, st.ffC, st.ffCcemux, st.ffCcepol, ID(CEC), st.ffCrstmux, st.ffCrstpol, ID(RSTC)); + pm.add_siguser(C, cell); + cell->setPort(ID(C), C); + cell->setParam(ID(CREG), 1); + } + + log(" clock: %s (%s)", log_signal(st.clock), "posedge"); + + if (st.ffC) + log(" ffC:%s", log_id(st.ffC)); + log("\n"); + } + + pm.blacklist(cell); +} + +struct XilinxDspPass : public Pass { + XilinxDspPass() : Pass("xilinx_dsp", "Xilinx: pack resources into DSPs") { } + void help() YS_OVERRIDE + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" xilinx_dsp [options] [selection]\n"); + log("\n"); + log("Pack input registers (A2, A1, B2, B1, C, D, AD; with optional enable/reset),\n"); + log("pipeline registers (M; with optional enable/reset), output registers (P; with\n"); + log("optional enable/reset), pre-adder and/or post-adder into Xilinx DSP resources.\n"); + log("\n"); + log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n"); + log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n"); + log("used to override the current accumulation result with a new value, which will\n"); + log("be added to the multiplier result to form the next accumulation result.\n"); + log("\n"); + log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n"); + log("connections (optionally, where 'P' is right-shifted by 17-bits and used as an\n"); + log("input to the post-adder -- a pattern common for summing partial products to\n"); + log("implement wide multipliers). Limited support also exists for similar cascading\n"); + log("for A and B using '[AB]COUT' -> '[AB]CIN'. Currently, cascade chains are limited\n"); + log("to a maximum length of 20 cells, corresponding to the smallest Xilinx 7 Series\n"); + log("device.\n"); + log("\n"); + log("\n"); + log("Experimental feature: addition/subtractions less than 12 or 24 bits with the\n"); + log("'(* use_dsp=\"simd\" *)' attribute attached to the output wire or attached to\n"); + log("the add/subtract operator will cause those operations to be implemented using\n"); + log("the 'SIMD' feature of DSPs.\n"); + log("\n"); + log("Experimental feature: the presence of a `$ge' cell attached to the registered\n"); + log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n"); + log("into using the DSP48E1's pattern detector feature for overflow detection.\n"); + log("\n"); + } + void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE + { + log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) + { + // if (args[argidx] == "-singleton") { + // singleton_mode = true; + // continue; + // } + break; + } + extra_args(args, argidx, design); + + for (auto module : design->selected_modules()) { + // Experimental feature: pack $add/$sub cells with + // (* use_dsp48="simd" *) into DSP48E1's using its + // SIMD feature + xilinx_simd_pack(module, module->selected_cells()); + + // Match for all features ([ABDMP][12]?REG, pre-adder, + // post-adder, pattern detector, etc.) except for CREG + { + xilinx_dsp_pm pm(module, module->selected_cells()); + pm.run_xilinx_dsp_pack(xilinx_dsp_pack); + } + // Separating out CREG packing is necessary since there + // is no guarantee that the cell ordering corresponds + // to the "expected" case (i.e. the order in which + // they appear in the source) thus the possiblity + // existed that a register got packed as a CREG into a + // downstream DSP that should have otherwise been a + // PREG of an upstream DSP that had not been visited + // yet + { + xilinx_dsp_CREG_pm pm(module, module->selected_cells()); + pm.run_xilinx_dsp_packC(xilinx_dsp_packC); + } + // Lastly, identify and utilise PCOUT -> PCIN, + // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade + // chains + { + xilinx_dsp_cascade_pm pm(module, module->selected_cells()); + pm.run_xilinx_dsp_cascade(); + } + } + } +} XilinxDspPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg new file mode 100644 index 000000000..0ba529011 --- /dev/null +++ b/passes/pmgen/xilinx_dsp.pmg @@ -0,0 +1,727 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// ( 1) Starting from a DSP48E1 cell +// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If ADREG matched, treat 'A' input as input of ADREG +// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// If A2REG, then match 'A' input for A1REG +// ( 5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached +// to the 'P' output where it is compared to a constant that is a +// power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +// Notes: +// - The intention of this pattern matcher is for it to be compatible with +// DSP48E1 cells inferred from multiply operations by Yosys, as well as for +// user instantiations that may already contain the cells being packed... +// (though the latter is currently untested) +// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used +// for each *REG match, it has been factored out into two subpatterns: +// in_dffe and out_dffe located at the bottom of this file. +// - Matching for pattern detector features is currently incomplete. For +// example, matching for underflow as well as overflow detection is +// possible, as would auto-reset, enabling saturated arithmetic, detecting +// custom patterns, etc. + +pattern xilinx_dsp_pack + +state <SigBit> clock +state <SigSpec> sigA sigB sigC sigD sigM sigP +state <IdString> postAddAB postAddMuxAB +state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol +state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol +state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux +state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux +state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux + +// Variables used for subpatterns +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +// (1) Starting from a DSP48E1 cell +match dsp + select dsp->type.in(\DSP48E1) +endmatch + +code sigA sigB sigC sigD sigM clock + auto unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigA = unextend(port(dsp, \A)); + sigB = unextend(port(dsp, \B)); + + sigC = port(dsp, \C, SigSpec()); + sigD = port(dsp, \D, SigSpec()); + + SigSpec P = port(dsp, \P); + if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { + // Only care about those bits that are used + int i; + for (i = GetSize(P)-1; i >= 0; i--) + if (nusers(P[i]) > 1) + break; + i++; + log_assert(nusers(P.extract_end(i)) <= 1); + // This sigM could have no users if downstream sinks (e.g. $add) is + // narrower than $mul result, for example + if (i == 0) + reject; + sigM = P.extract(0, i); + } + else + sigM = P; + + clock = port(dsp, \CLK, SigBit()); +endcode + +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'A' input as input of ADREG +code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock + if (param(dsp, \ADREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffAD = dff; + clock = dffclock; + if (dffrstmux) { + ffADrstmux = dffrstmux; + ffADrstpol = dffrstpol; + } + if (dffcemux) { + ffADcemux = dffcemux; + ffADcepol = dffcepol; + } + sigA = dffD; + } + } +endcode + +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +match preAdd + if sigD.empty() || sigD.is_fully_zero() + // Ensure that preAdder not already used + if param(dsp, \USE_DPORT, Const("FALSE")).decode_string() == "FALSE" + if port(dsp, \INMODE, Const(0, 5)).is_fully_zero() + + select preAdd->type.in($add) + // Output has to be 25 bits or less + select GetSize(port(preAdd, \Y)) <= 25 + select nusers(port(preAdd, \Y)) == 2 + choice <IdString> AB {\A, \B} + // A port has to be 30 bits or less + select GetSize(port(preAdd, AB)) <= 30 + define <IdString> BA (AB == \A ? \B : \A) + // D port has to be 25 bits or less + select GetSize(port(preAdd, BA)) <= 25 + index <SigSpec> port(preAdd, \Y) === sigA + + optional +endmatch + +code sigA sigD + if (preAdd) { + sigA = port(preAdd, \A); + sigD = port(preAdd, \B); + } +endcode + +// (4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// Then match 'A' input for A1REG +code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol + // Only search for ffA2 if there was a pre-adder + // (otherwise ffA2 would have been matched as ffAD) + if (preAdd) { + if (param(dsp, \AREG).as_int() == 0) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + ffA2 = dff; + clock = dffclock; + if (dffrstmux) { + ffA2rstmux = dffrstmux; + ffArstpol = dffrstpol; + } + if (dffcemux) { + ffA2cepol = dffcepol; + ffA2cemux = dffcemux; + } + sigA = dffD; + } + } + } + // And if there wasn't a pre-adder, + // move AD register to A + else if (ffAD) { + log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux); + std::swap(ffA2, ffAD); + std::swap(ffA2cemux, ffADcemux); + std::swap(ffA2rstmux, ffADrstmux); + ffA2cepol = ffADcepol; + ffArstpol = ffADrstpol; + } + + // Now attempt to match A1 + if (ffA2) { + argQ = sigA; + subpattern(in_dffe); + if (dff) { + if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffA1_end; + if (dffrstmux) { + if (ffArstpol != dffrstpol) + goto ffA1_end; + if (port(ffA2rstmux, \S) != port(dffrstmux, \S)) + goto ffA1_end; + ffA1rstmux = dffrstmux; + } + + ffA1 = dff; + clock = dffclock; + + if (dffcemux) { + ffA1cemux = dffcemux; + ffA1cepol = dffcepol; + } + sigA = dffD; + +ffA1_end: ; + } + } +endcode + +// (5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol + if (param(dsp, \BREG).as_int() == 0) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + ffB2 = dff; + clock = dffclock; + if (dffrstmux) { + ffB2rstmux = dffrstmux; + ffBrstpol = dffrstpol; + } + if (dffcemux) { + ffB2cemux = dffcemux; + ffB2cepol = dffcepol; + } + sigB = dffD; + + // Now attempt to match B1 + if (ffB2) { + argQ = sigB; + subpattern(in_dffe); + if (dff) { + if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr)) + goto ffB1_end; + if (dffrstmux) { + if (ffBrstpol != dffrstpol) + goto ffB1_end; + if (port(ffB2rstmux, \S) != port(dffrstmux, \S)) + goto ffB1_end; + ffB1rstmux = dffrstmux; + } + + ffB1 = dff; + clock = dffclock; + + if (dffcemux) { + ffB1cemux = dffcemux; + ffB1cepol = dffcepol; + } + sigB = dffD; + +ffB1_end: ; + } + } + + } + } +endcode + +// (6) Match 'D' input for DREG +code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock + if (param(dsp, \DREG).as_int() == 0) { + argQ = sigD; + subpattern(in_dffe); + if (dff) { + ffD = dff; + clock = dffclock; + if (dffrstmux) { + ffDrstmux = dffrstmux; + ffDrstpol = dffrstpol; + } + if (dffcemux) { + ffDcemux = dffcemux; + ffDcepol = dffcepol; + } + sigD = dffD; + } + } +endcode + +// (7) Match 'P' output that exclusively drives an MREG +code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock + if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { + argD = sigM; + subpattern(out_dffe); + if (dff) { + ffM = dff; + clock = dffclock; + if (dffrstmux) { + ffMrstmux = dffrstmux; + ffMrstpol = dffrstpol; + } + if (dffcemux) { + ffMcemux = dffcemux; + ffMcepol = dffcepol; + } + sigM = dffQ; + } + } + sigP = sigM; +endcode + +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +match postAdd + // Ensure that Z mux is not already used + if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() + + select postAdd->type.in($add) + select GetSize(port(postAdd, \Y)) <= 48 + choice <IdString> AB {\A, \B} + select nusers(port(postAdd, AB)) <= 3 + filter ffMcemux || nusers(port(postAdd, AB)) == 2 + filter !ffMcemux || nusers(port(postAdd, AB)) == 3 + + index <SigBit> port(postAdd, AB)[0] === sigP[0] + filter GetSize(port(postAdd, AB)) >= GetSize(sigP) + filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP + // Check that remainder of AB is a sign-extension + define <bool> AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool()) + filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) + set postAddAB AB + optional +endmatch + +code sigC sigP + if (postAdd) { + sigC = port(postAdd, postAddAB == \A ? \B : \A); + sigP = port(postAdd, \Y); + } +endcode + +// (9) Match 'P' output that exclusively drives a PREG +code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock + if (param(dsp, \PREG).as_int() == 0) { + int users = 2; + // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux + if (ffMcemux && !postAdd) users++; + if (nusers(sigP) == users) { + argD = sigP; + subpattern(out_dffe); + if (dff) { + ffP = dff; + clock = dffclock; + if (dffrstmux) { + ffPrstmux = dffrstmux; + ffPrstpol = dffrstpol; + } + if (dffcemux) { + ffPcemux = dffcemux; + ffPcepol = dffcepol; + } + sigP = dffQ; + } + } + } +endcode + +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +match postAddMux + if postAdd + if ffP + select postAddMux->type.in($mux) + select nusers(port(postAddMux, \Y)) == 2 + choice <IdString> AB {\A, \B} + index <SigSpec> port(postAddMux, AB) === sigP + index <SigSpec> port(postAddMux, \Y) === sigC + set postAddMuxAB AB + optional +endmatch + +code sigC + if (postAddMux) + sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); +endcode + +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +// the 'P' output where it is compared to a constant that is a power-of-2: +// e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +match overflow + if ffP + if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" + select overflow->type.in($ge) + select GetSize(port(overflow, \Y)) <= 48 + select port(overflow, \B).is_fully_const() + define <Const> B port(overflow, \B).as_const() + select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1 + index <SigSpec> port(overflow, \A) === sigP + optional +endmatch + +code + accept; +endcode + +// ####################### + +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +subpattern in_dffe +arg argD argQ clock + +code + dff = nullptr; + if (GetSize(argQ) == 0) + reject; + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant + if (!c.wire) + reject; + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } +endcode + +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ argD + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +endcode + +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero +match ffrstmux + if !argD.empty() + select ffrstmux->type.in($mux) + index <SigSpec> port(ffrstmux, \Y) === argD + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <bool> pol (BA == \B) + set ffrstpol pol + semioptional +endmatch + +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffcemux if argQ has at + // least 3 users (ff, <upstream>, ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; +endcode + +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +match ffcemux + if !argD.empty() + select ffcemux->type.in($mux) + index <SigSpec> port(ffcemux, \Y) === argD + choice <IdString> AB {\A, \B} + index <SigSpec> port(ffcemux, AB) === argQ + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, ffcepol ? \B : \A); + dffD.replace(port(ffcemux, \Y), argD); + } + else + dffcemux = nullptr; +endcode + +// ####################### + +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. +// At a high level: +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) +subpattern out_dffe +arg argD argQ clock + +code + dff = nullptr; + for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; +endcode + +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +match ffcemux + select ffcemux->type.in($mux) + // ffcemux output must have two users: ffcemux and ff.D + select nusers(port(ffcemux, \Y)) == 2 + + choice <IdString> AB {\A, \B} + // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) + select nusers(port(ffcemux, AB)) >= 3 + + slice offset GetSize(port(ffcemux, \Y)) + define <IdString> BA (AB == \A ? \B : \A) + index <SigBit> port(ffcemux, BA)[offset] === argD[0] + + // Check that the rest of argD is present + filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD) + filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define <bool> pol (AB == \A) + set ffcepol pol + + semioptional +endmatch + +code argD argQ + dffcemux = ffcemux; + if (ffcemux) { + SigSpec BA = port(ffcemux, ffcepol ? \B : \A); + SigSpec Y = port(ffcemux, \Y); + argQ = argD; + argD.replace(BA, Y); + argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); + + dffcemux = ffcemux; + dffcepol = ffcepol; + } +endcode + +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +match ffrstmux + select ffrstmux->type.in($mux) + // ffrstmux output must have two users: ffrstmux and ff.D + select nusers(port(ffrstmux, \Y)) == 2 + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + slice offset GetSize(port(ffrstmux, \Y)) + define <IdString> AB (BA == \B ? \A : \B) + index <SigBit> port(ffrstmux, AB)[offset] === argD[0] + + // Check that offset is consistent + filter !ffcemux || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) + filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD + + set ffoffset offset + define <bool> pol (AB == \A) + set ffrstpol pol + + semioptional +endmatch + +code argD argQ + dffrstmux = ffrstmux; + if (ffrstmux) { + SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); + SigSpec Y = port(ffrstmux, \Y); + argD.replace(AB, Y); + + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + } +endcode + +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \D)[offset] === argD[0] + + // Check that offset is consistent + filter (!ffcemux && !ffrstmux) || ffoffset == offset + // Check that the rest of argD is present + filter GetSize(port(ff, \D)) >= offset + GetSize(argD) + filter port(ff, \D).extract(offset, GetSize(argD)) == argD + // Check that FF.Q is connected to CE-mux + filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffcemux) { + argQ = argD; + argQ.replace(D, Q); + } + + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); +endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg new file mode 100644 index 000000000..5cd34162e --- /dev/null +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -0,0 +1,234 @@ +// This file describes the second of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// Notes: +// - Running CREG packing after xilinx_dsp_pack is necessary since there is no +// guarantee that the cell ordering corresponds to the "expected" case (i.e. +// the order in which they appear in the source) thus the possiblity existed +// that a register got packed as a CREG into a downstream DSP that should +// have otherwise been a PREG of an upstream DSP that had not been visited +// yet +// - The reason this is separated out from the xilinx_dsp.pmg file is +// for efficiency --- each *.pmg file creates a class of the same basename, +// which when constructed, creates a custom database tailored to the +// pattern(s) contained within. Since the pattern in this file must be +// executed after the pattern contained in xilinx_dsp.pmg, it is necessary +// to reconstruct this database. Separating the two patterns into +// independent files causes two smaller, more specific, databases. + +pattern xilinx_dsp_packC + +udata <std::function<SigSpec(const SigSpec&)>> unextend +state <SigBit> clock +state <SigSpec> sigC sigP +state <bool> ffCcepol ffCrstpol +state <Cell*> ffC ffCcemux ffCrstmux + +// Variables used for subpatterns +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +match dsp + select dsp->type.in(\DSP48E1) + select param(dsp, \CREG, 1).as_int() == 0 + select nusers(port(dsp, \C, SigSpec())) > 1 +endmatch + +code sigC sigP clock + unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + sigC = unextend(port(dsp, \C, SigSpec())); + + SigSpec P = port(dsp, \P); + if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { + // Only care about those bits that are used + int i; + for (i = GetSize(P)-1; i >= 0; i--) + if (nusers(P[i]) > 1) + break; + i++; + log_assert(nusers(P.extract_end(i)) <= 1); + sigP = P.extract(0, i); + } + else + sigP = P; + + clock = port(dsp, \CLK, SigBit()); +endcode + +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using the in_dffe subpattern) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + argQ = sigC; + subpattern(in_dffe); + if (dff) { + ffC = dff; + clock = dffclock; + if (dffrstmux) { + ffCrstmux = dffrstmux; + ffCrstpol = dffrstpol; + } + if (dffcemux) { + ffCcemux = dffcemux; + ffCcepol = dffcepol; + } + sigC = dffD; + } +endcode + +code + if (ffC) + accept; +endcode + +// ####################### + +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +subpattern in_dffe +arg argD argQ clock + +code + dff = nullptr; + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant + if (!c.wire) + reject; + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } +endcode + +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ argD + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +endcode + +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero +match ffrstmux + if !argD.empty() + select ffrstmux->type.in($mux) + index <SigSpec> port(ffrstmux, \Y) === argD + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <bool> pol (BA == \B) + set ffrstpol pol + semioptional +endmatch + +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffcemux if argQ has at + // least 3 users (ff, <upstream>, ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; +endcode + +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +match ffcemux + if !argD.empty() + select ffcemux->type.in($mux) + index <SigSpec> port(ffcemux, \Y) === argD + choice <IdString> AB {\A, \B} + index <SigSpec> port(ffcemux, AB) === argQ + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, ffcepol ? \B : \A); + dffD.replace(port(ffcemux, \Y), argD); + } + else + dffcemux = nullptr; +endcode diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg new file mode 100644 index 000000000..7a32df2b7 --- /dev/null +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -0,0 +1,427 @@ +// This file describes the third of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP +// (4) Same as (3) but for BCOUT -> BCIN cascade +// (5) Recursively go to (2.1) until no more matches possible, keeping track +// of the longest possible chain found +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate +// Notes: +// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered +// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need +// not be the case --- [AB] cascades can exist independently of a P cascade +// (though all three cascades must come from the same DSP). This situation +// is not handled currently. +// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently +// conservative in that they examine the situation where (a) the previous +// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no +// registers enabled, and (c) that there exists only one additional register +// between the upstream and downstream DSPs. This can certainly be relaxed +// to identify situations ranging from (i) neither DSP uses any registers, +// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and +// there exists a further 2 registers between them. This remains a TODO +// item. + +pattern xilinx_dsp_cascade + +udata <std::function<SigSpec(const SigSpec&)>> unextend +udata <vector<std::tuple<Cell*,int,int,int>>> chain longest_chain +state <Cell*> next +state <SigSpec> clock +state <int> AREG BREG + +// Variables used for subpatterns +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +code +#define MAX_DSP_CASCADE 20 +endcode + +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port +match first + select first->type.in(\DSP48E1) + select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") + select nusers(port(first, \PCOUT, SigSpec())) <= 1 +endmatch + +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate +code + longest_chain.clear(); + chain.emplace_back(first, -1, -1, -1); + subpattern(tail); +finally + chain.pop_back(); + log_assert(chain.empty()); + if (GetSize(longest_chain) > 1) { + Cell *dsp = std::get<0>(longest_chain.front()); + + Cell *dsp_pcin; + int P, AREG, BREG; + for (int i = 1; i < GetSize(longest_chain); i++) { + std::tie(dsp_pcin,P,AREG,BREG) = longest_chain[i]; + + if (i % MAX_DSP_CASCADE > 0) { + if (P >= 0) { + Wire *cascade = module->addWire(NEW_ID, 48); + dsp_pcin->setPort(ID(C), Const(0, 48)); + dsp_pcin->setPort(ID(PCIN), cascade); + dsp->setPort(ID(PCOUT), cascade); + add_siguser(cascade, dsp_pcin); + add_siguser(cascade, dsp); + + SigSpec opmode = port(dsp_pcin, \OPMODE, Const(0, 7)); + if (P == 17) + opmode[6] = State::S1; + else if (P == 0) + opmode[6] = State::S0; + else log_abort(); + + opmode[5] = State::S0; + opmode[4] = State::S1; + dsp_pcin->setPort(\OPMODE, opmode); + + log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); + } + if (AREG >= 0) { + Wire *cascade = module->addWire(NEW_ID, 30); + dsp_pcin->setPort(ID(A), Const(0, 30)); + dsp_pcin->setPort(ID(ACIN), cascade); + dsp->setPort(ID(ACOUT), cascade); + add_siguser(cascade, dsp_pcin); + add_siguser(cascade, dsp); + + dsp->setParam(ID(ACASCREG), AREG); + dsp_pcin->setParam(ID(A_INPUT), Const("CASCADE")); + + log_debug("ACOUT -> ACIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); + } + if (BREG >= 0) { + Wire *cascade = module->addWire(NEW_ID, 18); + dsp_pcin->setPort(ID(B), Const(0, 18)); + dsp_pcin->setPort(ID(BCIN), cascade); + dsp->setPort(ID(BCOUT), cascade); + add_siguser(cascade, dsp_pcin); + add_siguser(cascade, dsp); + + dsp->setParam(ID(BCASCREG), BREG); + dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE")); + + log_debug("BCOUT -> BCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); + } + } + else { + log_debug(" Blocking %s -> %s cascade (exceeds max: %d)\n", log_id(dsp), log_id(dsp_pcin), MAX_DSP_CASCADE); + } + + dsp = dsp_pcin; + } + + accept; + } +endcode + +// ------------------------------------------------------------------ + +subpattern tail +arg first +arg next + +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused +match nextP + select nextP->type.in(\DSP48E1) + select !param(nextP, \CREG, State::S1).as_bool() + select port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011") + select nusers(port(nextP, \C, SigSpec())) > 1 + select nusers(port(nextP, \PCIN, SigSpec())) == 0 + index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0] + semioptional +endmatch + +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits +match nextP_shift17 + if !nextP + select nextP_shift17->type.in(\DSP48E1) + select !param(nextP_shift17, \CREG, State::S1).as_bool() + select port(nextP_shift17, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011") + select nusers(port(nextP_shift17, \C, SigSpec())) > 1 + select nusers(port(nextP_shift17, \PCIN, SigSpec())) == 0 + index <SigBit> port(nextP_shift17, \C)[0] === port(std::get<0>(chain.back()), \P)[17] + semioptional +endmatch + +code next + next = nextP; + if (!nextP) + next = nextP_shift17; + if (next) { + unextend = [](const SigSpec &sig) { + int i; + for (i = GetSize(sig)-1; i > 0; i--) + if (sig[i] != sig[i-1]) + break; + // Do not remove non-const sign bit + if (sig[i].wire) + ++i; + return sig.extract(0, i); + }; + } +endcode + +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP +code argQ clock AREG + AREG = -1; + if (next) { + Cell *prev = std::get<0>(chain.back()); + if (param(prev, \AREG, 2).as_int() > 0 && + param(next, \AREG, 2).as_int() > 0 && + param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + nusers(port(prev, \ACOUT, SigSpec())) <= 1) { + argQ = unextend(port(next, \A)); + clock = port(prev, \CLK); + subpattern(in_dffe); + if (dff) { + if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0) + goto reject_AREG; + if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) + goto reject_AREG; + if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0) + goto reject_AREG; + if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0)) + goto reject_AREG; + if (dffD == unextend(port(prev, \A))) + AREG = 1; +reject_AREG: ; + } + } + } +endcode + +// (4) Same as (3) but for BCOUT -> BCIN cascade +code argQ clock BREG + BREG = -1; + if (next) { + Cell *prev = std::get<0>(chain.back()); + if (param(prev, \BREG, 2).as_int() > 0 && + param(next, \BREG, 2).as_int() > 0 && + param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + port(next, \BCIN, SigSpec()).is_fully_zero() && + nusers(port(prev, \BCOUT, SigSpec())) <= 1) { + argQ = unextend(port(next, \B)); + clock = port(prev, \CLK); + subpattern(in_dffe); + if (dff) { + if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0) + goto reject_BREG; + if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) + goto reject_BREG; + if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0) + goto reject_BREG; + if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0)) + goto reject_BREG; + if (dffD == unextend(port(prev, \B))) + BREG = 1; +reject_BREG: ; + } + } + } +endcode + +// (5) Recursively go to (2.1) until no more matches possible, recording the +// longest possible chain +code + if (next) { + chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG); + + SigSpec sigC = unextend(port(next, \C)); + + if (nextP_shift17) { + if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) && + port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) + subpattern(tail); + } + else { + if (GetSize(sigC) <= GetSize(port(std::get<0>(chain.back()), \P)) && + port(std::get<0>(chain.back()), \P).extract(0, GetSize(sigC)) != sigC) + subpattern(tail); + + } + } else { + if (GetSize(chain) > GetSize(longest_chain)) + longest_chain = chain; + } +finally + if (next) + chain.pop_back(); +endcode + +// ####################### + +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +subpattern in_dffe +arg argD argQ clock + +code + dff = nullptr; + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant + if (!c.wire) + reject; + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } +endcode + +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +match ff + select ff->type.in($dff) + // DSP48E1 does not support clock inversion + select param(ff, \CLK_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index <SigBit> port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK) == clock + + set ffoffset offset +endmatch + +code argQ argD + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + argD = port(ff, \D); + argQ = Q; + dffD.replace(argQ, argD); + // Only search for ffrstmux if dffD only + // has two (ff, ffrstmux) users + if (nusers(dffD) > 2) + argD = SigSpec(); +endcode + +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero +match ffrstmux + if !argD.empty() + select ffrstmux->type.in($mux) + index <SigSpec> port(ffrstmux, \Y) === argD + + choice <IdString> BA {\B, \A} + // DSP48E1 only supports reset to zero + select port(ffrstmux, BA).is_fully_zero() + + define <bool> pol (BA == \B) + set ffrstpol pol + semioptional +endmatch + +code argD + if (ffrstmux) { + dffrstmux = ffrstmux; + dffrstpol = ffrstpol; + argD = port(ffrstmux, ffrstpol ? \A : \B); + dffD.replace(port(ffrstmux, \Y), argD); + + // Only search for ffcemux if argQ has at + // least 3 users (ff, <upstream>, ffrstmux) and + // dffD only has two (ff, ffrstmux) + if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) + argD = SigSpec(); + } + else + dffrstmux = nullptr; +endcode + +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff +match ffcemux + if !argD.empty() + select ffcemux->type.in($mux) + index <SigSpec> port(ffcemux, \Y) === argD + choice <IdString> AB {\A, \B} + index <SigSpec> port(ffcemux, AB) === argQ + define <bool> pol (AB == \A) + set ffcepol pol + semioptional +endmatch + +code argD + if (ffcemux) { + dffcemux = ffcemux; + dffcepol = ffcepol; + argD = port(ffcemux, ffcepol ? \B : \A); + dffD.replace(port(ffcemux, \Y), argD); + } + else + dffcemux = nullptr; +endcode diff --git a/passes/pmgen/xilinx_srl.pmg b/passes/pmgen/xilinx_srl.pmg index b18119b87..535b3dfdc 100644 --- a/passes/pmgen/xilinx_srl.pmg +++ b/passes/pmgen/xilinx_srl.pmg @@ -13,9 +13,9 @@ endcode match first select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1) select !first->has_keep_attr() - select !first->type.in(\FDRE) || !first->parameters.at(\IS_R_INVERTED, State::S0).as_bool() - select !first->type.in(\FDRE) || !first->parameters.at(\IS_D_INVERTED, State::S0).as_bool() - select !first->type.in(\FDRE, \FDRE_1) || first->connections_.at(\R, State::S0).is_fully_zero() + select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED, State::S0).as_bool() + select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED, State::S0).as_bool() + select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero() filter !non_first_cells.count(first) generate SigSpec C = module->addWire(NEW_ID); @@ -84,9 +84,9 @@ arg en_port match first select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1) select !first->has_keep_attr() - select !first->type.in(\FDRE) || !first->parameters.at(\IS_R_INVERTED, State::S0).as_bool() - select !first->type.in(\FDRE) || !first->parameters.at(\IS_D_INVERTED, State::S0).as_bool() - select !first->type.in(\FDRE, \FDRE_1) || first->connections_.at(\R, State::S0).is_fully_zero() + select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED, State::S0).as_bool() + select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED, State::S0).as_bool() + select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero() endmatch code clk_port en_port @@ -111,10 +111,10 @@ match next index <SigBit> port(next, \Q) === port(first, \D) filter port(next, clk_port) == port(first, clk_port) filter en_port == IdString() || port(next, en_port) == port(first, en_port) - filter !first->type.in(\FDRE) || next->parameters.at(\IS_C_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_C_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE) || next->parameters.at(\IS_D_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_D_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE) || next->parameters.at(\IS_R_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_R_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE, \FDRE_1) || next->connections_.at(\R, State::S0).is_fully_zero() + filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED, State::S0).as_bool() == param(first, \IS_C_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED, State::S0).as_bool() == param(first, \IS_D_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED, State::S0).as_bool() == param(first, \IS_R_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero() endmatch code @@ -138,10 +138,10 @@ match next index <SigBit> port(next, \Q) === port(chain.back(), \D) filter port(next, clk_port) == port(first, clk_port) filter en_port == IdString() || port(next, en_port) == port(first, en_port) - filter !first->type.in(\FDRE) || next->parameters.at(\IS_C_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_C_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE) || next->parameters.at(\IS_D_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_D_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE) || next->parameters.at(\IS_R_INVERTED, State::S0).as_bool() == first->parameters.at(\IS_R_INVERTED, State::S0).as_bool() - filter !first->type.in(\FDRE, \FDRE_1) || next->connections_.at(\R, State::S0).is_fully_zero() + filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED, State::S0).as_bool() == param(first, \IS_C_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED, State::S0).as_bool() == param(first, \IS_D_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED, State::S0).as_bool() == param(first, \IS_R_INVERTED, State::S0).as_bool() + filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero() generate Cell *cell = module->addCell(NEW_ID, chain.back()->type); cell->setPort(\C, chain.back()->getPort(\C)); @@ -149,7 +149,7 @@ generate cell->setPort(\Q, chain.back()->getPort(\D)); if (cell->type == \FDRE) { if (rng(2) == 0) - cell->setPort(\R, chain.back()->connections_.at(\R, State::S0)); + cell->setPort(\R, port(chain.back(), \R, State::S0)); cell->setPort(\CE, chain.back()->getPort(\CE)); } else if (cell->type.begins_with("$_DFFE_")) diff --git a/passes/proc/proc_dlatch.cc b/passes/proc/proc_dlatch.cc index d9d5dfbed..a0c8351b6 100644 --- a/passes/proc/proc_dlatch.cc +++ b/passes/proc/proc_dlatch.cc @@ -349,6 +349,10 @@ void proc_dlatch(proc_dlatch_db_t &db, RTLIL::Process *proc) continue; } + if (proc->get_bool_attribute(ID(always_ff))) + log_error("Found non edge/level sensitive event in always_ff process `%s.%s'.\n", + db.module->name.c_str(), proc->name.c_str()); + for (auto ss : sr->actions) { db.sigmap.apply(ss.first); @@ -383,8 +387,12 @@ void proc_dlatch(proc_dlatch_db_t &db, RTLIL::Process *proc) int offset = 0; for (auto chunk : nolatches_bits.first.chunks()) { SigSpec lhs = chunk, rhs = nolatches_bits.second.extract(offset, chunk.width); - log("No latch inferred for signal `%s.%s' from process `%s.%s'.\n", - db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str()); + if (proc->get_bool_attribute(ID(always_latch))) + log_error("No latch inferred for signal `%s.%s' from always_latch process `%s.%s'.\n", + db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str()); + else + log("No latch inferred for signal `%s.%s' from process `%s.%s'.\n", + db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str()); db.module->connect(lhs, rhs); offset += chunk.width; } @@ -410,8 +418,12 @@ void proc_dlatch(proc_dlatch_db_t &db, RTLIL::Process *proc) cell->set_src_attribute(src); db.generated_dlatches.insert(cell); - log("Latch inferred for signal `%s.%s' from process `%s.%s': %s\n", - db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str(), log_id(cell)); + if (proc->get_bool_attribute(ID(always_comb))) + log_error("Latch inferred for signal `%s.%s' from always_comb process `%s.%s'.\n", + db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str()); + else + log("Latch inferred for signal `%s.%s' from process `%s.%s': %s\n", + db.module->name.c_str(), log_signal(lhs), db.module->name.c_str(), proc->name.c_str(), log_id(cell)); } offset += width; diff --git a/passes/sat/async2sync.cc b/passes/sat/async2sync.cc index 24ae6e448..740248545 100644 --- a/passes/sat/async2sync.cc +++ b/passes/sat/async2sync.cc @@ -198,6 +198,7 @@ struct Async2syncPass : public Pass { module->addMux(NEW_ID, sig_d, new_q, sig_en, sig_q); } + cell->setPort("\\D", sig_q); cell->setPort("\\Q", new_q); cell->unsetPort("\\EN"); cell->unsetParam("\\EN_POLARITY"); diff --git a/passes/techmap/abc9.cc b/passes/techmap/abc9.cc index 7eac08d17..27106cc5d 100644 --- a/passes/techmap/abc9.cc +++ b/passes/techmap/abc9.cc @@ -71,21 +71,21 @@ RTLIL::Module *module; bool clk_polarity, en_polarity; RTLIL::SigSpec clk_sig, en_sig; -inline std::string remap_name(RTLIL::IdString abc_name) +inline std::string remap_name(RTLIL::IdString abc9_name) { - return stringf("$abc$%d$%s", map_autoidx, abc_name.c_str()+1); + return stringf("$abc$%d$%s", map_autoidx, abc9_name.c_str()+1); } void handle_loops(RTLIL::Design *design) { - Pass::call(design, "scc -set_attr abc_scc_id {}"); + Pass::call(design, "scc -set_attr abc9_scc_id {}"); // For every unique SCC found, (arbitrarily) find the first // cell in the component, and select (and mark) all its output // wires pool<RTLIL::Const> ids_seen; for (auto cell : module->cells()) { - auto it = cell->attributes.find(ID(abc_scc_id)); + auto it = cell->attributes.find(ID(abc9_scc_id)); if (it != cell->attributes.end()) { auto r = ids_seen.insert(it->second); if (r.second) { @@ -105,7 +105,7 @@ void handle_loops(RTLIL::Design *design) log_assert(w->port_input); log_assert(b.offset < GetSize(w)); } - w->set_bool_attribute(ID(abc_scc_break)); + w->set_bool_attribute(ID(abc9_scc_break)); module->swap_names(b.wire, w); c.second = RTLIL::SigBit(w, b.offset); } @@ -118,7 +118,7 @@ void handle_loops(RTLIL::Design *design) module->fixup_ports(); } -std::string add_echos_to_abc_cmd(std::string str) +std::string add_echos_to_abc9_cmd(std::string str) { std::string new_str, token; for (size_t i = 0; i < str.size(); i++) { @@ -140,7 +140,7 @@ std::string add_echos_to_abc_cmd(std::string str) return new_str; } -std::string fold_abc_cmd(std::string str) +std::string fold_abc9_cmd(std::string str) { std::string token, new_str = " "; int char_counter = 10; @@ -184,7 +184,7 @@ std::string replace_tempdir(std::string text, std::string tempdir_name, bool sho return text; } -struct abc_output_filter +struct abc9_output_filter { bool got_cr; int escape_seq_state; @@ -192,7 +192,7 @@ struct abc_output_filter std::string tempdir_name; bool show_tempdir; - abc_output_filter(std::string tempdir_name, bool show_tempdir) : tempdir_name(tempdir_name), show_tempdir(show_tempdir) + abc9_output_filter(std::string tempdir_name, bool show_tempdir) : tempdir_name(tempdir_name), show_tempdir(show_tempdir) { got_cr = false; escape_seq_state = 0; @@ -247,7 +247,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri bool cleanup, vector<int> lut_costs, bool dff_mode, std::string clk_str, bool /*keepff*/, std::string delay_target, std::string /*lutin_shared*/, bool fast_mode, bool show_tempdir, std::string box_file, std::string lut_file, - std::string wire_delay, const dict<int,IdString> &box_lookup + std::string wire_delay, const dict<int,IdString> &box_lookup, bool nomfs ) { module = current_module; @@ -293,68 +293,72 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri log_header(design, "Extracting gate netlist of module `%s' to `%s/input.xaig'..\n", module->name.c_str(), replace_tempdir(tempdir_name, tempdir_name, show_tempdir).c_str()); - std::string abc_script; + std::string abc9_script; if (!lut_costs.empty()) { - abc_script += stringf("read_lut %s/lutdefs.txt; ", tempdir_name.c_str()); + abc9_script += stringf("read_lut %s/lutdefs.txt; ", tempdir_name.c_str()); if (!box_file.empty()) - abc_script += stringf("read_box -v %s; ", box_file.c_str()); + abc9_script += stringf("read_box -v %s; ", box_file.c_str()); } else if (!lut_file.empty()) { - abc_script += stringf("read_lut %s; ", lut_file.c_str()); + abc9_script += stringf("read_lut %s; ", lut_file.c_str()); if (!box_file.empty()) - abc_script += stringf("read_box -v %s; ", box_file.c_str()); + abc9_script += stringf("read_box -v %s; ", box_file.c_str()); } else log_abort(); - abc_script += stringf("&read %s/input.xaig; &ps; ", tempdir_name.c_str()); + abc9_script += stringf("&read %s/input.xaig; &ps; ", tempdir_name.c_str()); if (!script_file.empty()) { if (script_file[0] == '+') { for (size_t i = 1; i < script_file.size(); i++) if (script_file[i] == '\'') - abc_script += "'\\''"; + abc9_script += "'\\''"; else if (script_file[i] == ',') - abc_script += " "; + abc9_script += " "; else - abc_script += script_file[i]; + abc9_script += script_file[i]; } else - abc_script += stringf("source %s", script_file.c_str()); + abc9_script += stringf("source %s", script_file.c_str()); } else if (!lut_costs.empty() || !lut_file.empty()) { //bool all_luts_cost_same = true; //for (int this_cost : lut_costs) // if (this_cost != lut_costs.front()) // all_luts_cost_same = false; - abc_script += fast_mode ? ABC_FAST_COMMAND_LUT : ABC_COMMAND_LUT; + abc9_script += fast_mode ? ABC_FAST_COMMAND_LUT : ABC_COMMAND_LUT; //if (all_luts_cost_same && !fast_mode) - // abc_script += "; lutpack {S}"; + // abc9_script += "; lutpack {S}"; } else log_abort(); //if (script_file.empty() && !delay_target.empty()) - // for (size_t pos = abc_script.find("dretime;"); pos != std::string::npos; pos = abc_script.find("dretime;", pos+1)) - // abc_script = abc_script.substr(0, pos) + "dretime; retime -o {D};" + abc_script.substr(pos+8); + // for (size_t pos = abc9_script.find("dretime;"); pos != std::string::npos; pos = abc9_script.find("dretime;", pos+1)) + // abc9_script = abc9_script.substr(0, pos) + "dretime; retime -o {D};" + abc9_script.substr(pos+8); - for (size_t pos = abc_script.find("{D}"); pos != std::string::npos; pos = abc_script.find("{D}", pos)) - abc_script = abc_script.substr(0, pos) + delay_target + abc_script.substr(pos+3); + for (size_t pos = abc9_script.find("{D}"); pos != std::string::npos; pos = abc9_script.find("{D}", pos)) + abc9_script = abc9_script.substr(0, pos) + delay_target + abc9_script.substr(pos+3); - //for (size_t pos = abc_script.find("{S}"); pos != std::string::npos; pos = abc_script.find("{S}", pos)) - // abc_script = abc_script.substr(0, pos) + lutin_shared + abc_script.substr(pos+3); + //for (size_t pos = abc9_script.find("{S}"); pos != std::string::npos; pos = abc9_script.find("{S}", pos)) + // abc9_script = abc9_script.substr(0, pos) + lutin_shared + abc9_script.substr(pos+3); - for (size_t pos = abc_script.find("{W}"); pos != std::string::npos; pos = abc_script.find("{W}", pos)) - abc_script = abc_script.substr(0, pos) + wire_delay + abc_script.substr(pos+3); + for (size_t pos = abc9_script.find("{W}"); pos != std::string::npos; pos = abc9_script.find("{W}", pos)) + abc9_script = abc9_script.substr(0, pos) + wire_delay + abc9_script.substr(pos+3); - abc_script += stringf("; &write %s/output.aig", tempdir_name.c_str()); - abc_script = add_echos_to_abc_cmd(abc_script); + if (nomfs) + for (size_t pos = abc9_script.find("&mfs"); pos != std::string::npos; pos = abc9_script.find("&mfs", pos)) + abc9_script = abc9_script.erase(pos, strlen("&mfs")); - for (size_t i = 0; i+1 < abc_script.size(); i++) - if (abc_script[i] == ';' && abc_script[i+1] == ' ') - abc_script[i+1] = '\n'; + abc9_script += stringf("; &write %s/output.aig", tempdir_name.c_str()); + abc9_script = add_echos_to_abc9_cmd(abc9_script); + + for (size_t i = 0; i+1 < abc9_script.size(); i++) + if (abc9_script[i] == ';' && abc9_script[i+1] == ' ') + abc9_script[i+1] = '\n'; FILE *f = fopen(stringf("%s/abc.script", tempdir_name.c_str()).c_str(), "wt"); - fprintf(f, "%s\n", abc_script.c_str()); + fprintf(f, "%s\n", abc9_script.c_str()); fclose(f); if (dff_mode || !clk_str.empty()) @@ -420,7 +424,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri // the expose operation -- remove them from PO/PI // and re-connecting them back together for (auto wire : module->wires()) { - auto it = wire->attributes.find(ID(abc_scc_break)); + auto it = wire->attributes.find(ID(abc9_scc_break)); if (it != wire->attributes.end()) { wire->attributes.erase(it); log_assert(wire->port_output); @@ -450,28 +454,28 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri log("Running ABC command: %s\n", replace_tempdir(buffer, tempdir_name, show_tempdir).c_str()); #ifndef YOSYS_LINK_ABC - abc_output_filter filt(tempdir_name, show_tempdir); - int ret = run_command(buffer, std::bind(&abc_output_filter::next_line, filt, std::placeholders::_1)); + abc9_output_filter filt(tempdir_name, show_tempdir); + int ret = run_command(buffer, std::bind(&abc9_output_filter::next_line, filt, std::placeholders::_1)); #else // These needs to be mutable, supposedly due to getopt - char *abc_argv[5]; + char *abc9_argv[5]; string tmp_script_name = stringf("%s/abc.script", tempdir_name.c_str()); - abc_argv[0] = strdup(exe_file.c_str()); - abc_argv[1] = strdup("-s"); - abc_argv[2] = strdup("-f"); - abc_argv[3] = strdup(tmp_script_name.c_str()); - abc_argv[4] = 0; - int ret = Abc_RealMain(4, abc_argv); - free(abc_argv[0]); - free(abc_argv[1]); - free(abc_argv[2]); - free(abc_argv[3]); + abc9_argv[0] = strdup(exe_file.c_str()); + abc9_argv[1] = strdup("-s"); + abc9_argv[2] = strdup("-f"); + abc9_argv[3] = strdup(tmp_script_name.c_str()); + abc9_argv[4] = 0; + int ret = Abc_RealMain(4, abc9_argv); + free(abc9_argv[0]); + free(abc9_argv[1]); + free(abc9_argv[2]); + free(abc9_argv[3]); #endif if (ret != 0) log_error("ABC: execution of command \"%s\" failed: return code %d.\n", buffer.c_str(), ret); buffer = stringf("%s/%s", tempdir_name.c_str(), "output.aig"); - ifs.open(buffer); + ifs.open(buffer, std::ifstream::binary); if (ifs.fail()) log_error("Can't open ABC output file `%s'.\n", buffer.c_str()); @@ -513,7 +517,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri signal = std::move(bits); } - dict<IdString, bool> abc_box; + dict<IdString, bool> abc9_box; vector<RTLIL::Cell*> boxes; for (const auto &it : module->cells_) { auto cell = it.second; @@ -521,10 +525,10 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri module->remove(cell); continue; } - auto jt = abc_box.find(cell->type); - if (jt == abc_box.end()) { + auto jt = abc9_box.find(cell->type); + if (jt == abc9_box.end()) { RTLIL::Module* box_module = design->module(cell->type); - jt = abc_box.insert(std::make_pair(cell->type, box_module && box_module->attributes.count(ID(abc_box_id)))).first; + jt = abc9_box.insert(std::make_pair(cell->type, box_module && box_module->attributes.count(ID(abc9_box_id)))).first; } if (jt->second) boxes.emplace_back(cell); @@ -606,7 +610,6 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri existing_cell = module->cell(c->name); log_assert(existing_cell); cell = module->addCell(remap_name(c->name), c->type); - module->swap_names(cell, existing_cell); } if (markgroups) cell->attributes[ID(abcgroup)] = map_autoidx; @@ -642,8 +645,22 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri } } - for (auto cell : boxes) - module->remove(cell); + for (auto existing_cell : boxes) { + Cell *cell = module->cell(remap_name(existing_cell->name)); + if (cell) { + for (auto &conn : existing_cell->connections()) { + if (!conn.second.is_wire()) + continue; + Wire *wire = conn.second.as_wire(); + if (!wire->get_bool_attribute(ID(abc9_padding))) + continue; + cell->unsetPort(conn.first); + log_debug("Dropping padded port connection for %s (%s) .%s (%s )\n", log_id(cell), cell->type.c_str(), log_id(conn.first), log_signal(conn.second)); + } + module->swap_names(cell, existing_cell); + } + module->remove(existing_cell); + } // Copy connections (and rename) from mapped_mod to module for (auto conn : mapped_mod->connections()) { @@ -814,17 +831,17 @@ struct Abc9Pass : public Pass { log(" if no -script parameter is given, the following scripts are used:\n"); log("\n"); log(" for -lut/-luts (only one LUT size):\n"); - log("%s\n", fold_abc_cmd(ABC_COMMAND_LUT /*"; lutpack {S}"*/).c_str()); + log("%s\n", fold_abc9_cmd(ABC_COMMAND_LUT /*"; lutpack {S}"*/).c_str()); log("\n"); log(" for -lut/-luts (different LUT sizes):\n"); - log("%s\n", fold_abc_cmd(ABC_COMMAND_LUT).c_str()); + log("%s\n", fold_abc9_cmd(ABC_COMMAND_LUT).c_str()); log("\n"); log(" -fast\n"); log(" use different default scripts that are slightly faster (at the cost\n"); log(" of output quality):\n"); log("\n"); log(" for -lut/-luts:\n"); - log("%s\n", fold_abc_cmd(ABC_FAST_COMMAND_LUT).c_str()); + log("%s\n", fold_abc9_cmd(ABC_FAST_COMMAND_LUT).c_str()); log("\n"); log(" -D <picoseconds>\n"); log(" set delay target. the string {D} in the default scripts above is\n"); @@ -908,6 +925,7 @@ struct Abc9Pass : public Pass { std::string delay_target, lutin_shared = "-S 1", wire_delay; bool fast_mode = false, dff_mode = false, keepff = false, cleanup = true; bool show_tempdir = false; + bool nomfs = false; vector<int> lut_costs; markgroups = false; @@ -1030,6 +1048,10 @@ struct Abc9Pass : public Pass { wire_delay = "-W " + args[++argidx]; continue; } + if (arg == "-nomfs") { + nomfs = true; + continue; + } break; } extra_args(args, argidx, design); @@ -1044,7 +1066,7 @@ struct Abc9Pass : public Pass { dict<int,IdString> box_lookup; for (auto m : design->modules()) { - auto it = m->attributes.find(ID(abc_box_id)); + auto it = m->attributes.find(ID(abc9_box_id)); if (it == m->attributes.end()) continue; if (m->name.begins_with("$paramod")) @@ -1052,7 +1074,7 @@ struct Abc9Pass : public Pass { auto id = it->second.as_int(); auto r = box_lookup.insert(std::make_pair(id, m->name)); if (!r.second) - log_error("Module '%s' has the same abc_box_id = %d value as '%s'.\n", + log_error("Module '%s' has the same abc9_box_id = %d value as '%s'.\n", log_id(m), id, log_id(r.first->second)); log_assert(r.second); @@ -1060,24 +1082,24 @@ struct Abc9Pass : public Pass { for (auto p : m->ports) { auto w = m->wire(p); log_assert(w); - if (w->attributes.count(ID(abc_carry))) { + if (w->attributes.count(ID(abc9_carry))) { if (w->port_input) { if (carry_in) - log_error("Module '%s' contains more than one 'abc_carry' input port.\n", log_id(m)); + log_error("Module '%s' contains more than one 'abc9_carry' input port.\n", log_id(m)); carry_in = w; } else if (w->port_output) { if (carry_out) - log_error("Module '%s' contains more than one 'abc_carry' input port.\n", log_id(m)); + log_error("Module '%s' contains more than one 'abc9_carry' input port.\n", log_id(m)); carry_out = w; } } } if (carry_in || carry_out) { if (carry_in && !carry_out) - log_error("Module '%s' contains an 'abc_carry' input port but no output port.\n", log_id(m)); + log_error("Module '%s' contains an 'abc9_carry' input port but no output port.\n", log_id(m)); if (!carry_in && carry_out) - log_error("Module '%s' contains an 'abc_carry' output port but no input port.\n", log_id(m)); + log_error("Module '%s' contains an 'abc9_carry' output port but no input port.\n", log_id(m)); // Make carry_in the last PI, and carry_out the last PO // since ABC requires it this way auto &ports = m->ports; @@ -1105,7 +1127,7 @@ struct Abc9Pass : public Pass { for (auto mod : design->selected_modules()) { - if (mod->attributes.count(ID(abc_box_id))) + if (mod->attributes.count(ID(abc9_box_id))) continue; if (mod->processes.size() > 0) { @@ -1118,7 +1140,7 @@ struct Abc9Pass : public Pass { if (!dff_mode || !clk_str.empty()) { abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, dff_mode, clk_str, keepff, delay_target, lutin_shared, fast_mode, show_tempdir, - box_file, lut_file, wire_delay, box_lookup); + box_file, lut_file, wire_delay, box_lookup, nomfs); continue; } @@ -1264,7 +1286,7 @@ struct Abc9Pass : public Pass { en_sig = assign_map(std::get<3>(it.first)); abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, !clk_sig.empty(), "$", keepff, delay_target, lutin_shared, fast_mode, show_tempdir, - box_file, lut_file, wire_delay, box_lookup); + box_file, lut_file, wire_delay, box_lookup, nomfs); assign_map.set(mod); } } diff --git a/passes/techmap/aigmap.cc b/passes/techmap/aigmap.cc index 1d5e1286b..2ecb2f35a 100644 --- a/passes/techmap/aigmap.cc +++ b/passes/techmap/aigmap.cc @@ -27,6 +27,7 @@ struct AigmapPass : public Pass { AigmapPass() : Pass("aigmap", "map logic to and-inverter-graph circuit") { } void help() YS_OVERRIDE { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| log("\n"); log(" aigmap [options] [selection]\n"); log("\n"); @@ -36,10 +37,15 @@ struct AigmapPass : public Pass { log(" -nand\n"); log(" Enable creation of $_NAND_ cells\n"); log("\n"); + log(" -select\n"); + log(" Overwrite replaced cells in the current selection with new $_AND_,\n"); + log(" $_NOT_, and $_NAND_, cells\n"); + + log("\n"); } void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { - bool nand_mode = false; + bool nand_mode = false, select_mode = false; log_header(design, "Executing AIGMAP pass (map logic to AIG).\n"); @@ -50,6 +56,10 @@ struct AigmapPass : public Pass { nand_mode = true; continue; } + if (args[argidx] == "-select") { + select_mode = true; + continue; + } break; } extra_args(args, argidx, design); @@ -62,6 +72,7 @@ struct AigmapPass : public Pass { dict<IdString, int> stat_not_replaced; int orig_num_cells = GetSize(module->cells()); + pool<IdString> new_sel; for (auto cell : module->selected_cells()) { Aig aig(cell); @@ -75,6 +86,8 @@ struct AigmapPass : public Pass { if (aig.name.empty()) { not_replaced_count++; stat_not_replaced[cell->type]++; + if (select_mode) + new_sel.insert(cell->name); continue; } @@ -95,19 +108,33 @@ struct AigmapPass : public Pass { SigBit A = sigs.at(node.left_parent); SigBit B = sigs.at(node.right_parent); if (nand_mode && node.inverter) { - bit = module->NandGate(NEW_ID, A, B); + bit = module->addWire(NEW_ID); + auto gate = module->addNandGate(NEW_ID, A, B, bit); + if (select_mode) + new_sel.insert(gate->name); + goto skip_inverter; } else { pair<int, int> key(node.left_parent, node.right_parent); if (and_cache.count(key)) bit = and_cache.at(key); - else - bit = module->AndGate(NEW_ID, A, B); + else { + bit = module->addWire(NEW_ID); + auto gate = module->addAndGate(NEW_ID, A, B, bit); + if (select_mode) + new_sel.insert(gate->name); + } } } - if (node.inverter) - bit = module->NotGate(NEW_ID, bit); + if (node.inverter) { + SigBit new_bit = module->addWire(NEW_ID); + auto gate = module->addNotGate(NEW_ID, bit, new_bit); + bit = new_bit; + if (select_mode) + new_sel.insert(gate->name); + + } skip_inverter: for (auto &op : node.outports) @@ -142,6 +169,13 @@ struct AigmapPass : public Pass { for (auto cell : replaced_cells) module->remove(cell); + + if (select_mode) { + log_assert(!design->selection_stack.empty()); + RTLIL::Selection& sel = design->selection_stack.back(); + sel.selected_members[module->name] = std::move(new_sel); + } + } } } AigmapPass; diff --git a/passes/techmap/extract_fa.cc b/passes/techmap/extract_fa.cc index 29700c37b..9f3bb525b 100644 --- a/passes/techmap/extract_fa.cc +++ b/passes/techmap/extract_fa.cc @@ -262,10 +262,14 @@ struct ExtractFaWorker pool<SigBit> new_leaves = leaves; new_leaves.erase(bit); - if (cell->hasPort(ID::A)) new_leaves.insert(sigmap(SigBit(cell->getPort(ID::A)))); - if (cell->hasPort(ID::B)) new_leaves.insert(sigmap(SigBit(cell->getPort(ID::B)))); - if (cell->hasPort(ID(C))) new_leaves.insert(sigmap(SigBit(cell->getPort(ID(C))))); - if (cell->hasPort(ID(D))) new_leaves.insert(sigmap(SigBit(cell->getPort(ID(D))))); + for (auto port : {ID::A, ID::B, ID(C), ID(D)}) { + if (!cell->hasPort(port)) + continue; + auto bit = sigmap(SigBit(cell->getPort(port))); + if (!bit.wire) + continue; + new_leaves.insert(bit); + } if (GetSize(new_leaves) > maxbreadth) continue; diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc index 5807178dd..a2ad87f7d 100644 --- a/passes/techmap/flowmap.cc +++ b/passes/techmap/flowmap.cc @@ -394,7 +394,7 @@ struct FlowGraph pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> edge_cut() { - pool<RTLIL::SigBit> x, xi; + pool<RTLIL::SigBit> x = {source}, xi; // X and X̅ in the paper NodePrime source_prime = {source, true}; pool<NodePrime> visited; @@ -437,6 +437,7 @@ struct FlowGraph for (auto collapsed_node : collapsed[sink]) xi.insert(collapsed_node); + log_assert(x[source] && !xi[source]); log_assert(!x[sink] && xi[sink]); return {x, xi}; } @@ -1050,7 +1051,7 @@ struct FlowmapWorker auto cut_inputs = cut_lut_at_gate(lut, lut_gate); pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second; - if (gate_inputs.empty() && (int)other_inputs.size() == order) + if (gate_inputs.empty() && (int)other_inputs.size() >= order) { if (debug_relax) log(" Breaking would result in a (k+1)-LUT.\n"); diff --git a/passes/techmap/techmap.cc b/passes/techmap/techmap.cc index 08a1af2d5..0c57733d4 100644 --- a/passes/techmap/techmap.cc +++ b/passes/techmap/techmap.cc @@ -257,6 +257,12 @@ struct TechmapWorker w->add_strpool_attribute(ID(src), extra_src_attrs); } design->select(module, w); + + if (it.second->name.begins_with("\\_TECHMAP_REPLACE_.")) { + IdString replace_name = stringf("%s%s", orig_cell_name.c_str(), it.second->name.c_str() + strlen("\\_TECHMAP_REPLACE_")); + Wire *replace_w = module->addWire(replace_name, it.second); + module->connect(replace_w, w); + } } SigMap tpl_sigmap(tpl); @@ -378,6 +384,8 @@ struct TechmapWorker if (techmap_replace_cell) c_name = orig_cell_name; + else if (it.second->name.begins_with("\\_TECHMAP_REPLACE_.")) + c_name = stringf("%s%s", orig_cell_name.c_str(), c_name.c_str() + strlen("\\_TECHMAP_REPLACE_")); else apply_prefix(cell->name, c_name); @@ -1198,6 +1206,12 @@ struct TechmapPass : public Pass { log("\n"); log("A cell with the name _TECHMAP_REPLACE_ in the map file will inherit the name\n"); log("and attributes of the cell that is being replaced.\n"); + log("A cell with a name of the form `_TECHMAP_REPLACE_.<suffix>` in the map file will\n"); + log("be named thus but with the `_TECHMAP_REPLACE_' prefix substituted with the name\n"); + log("of the cell being replaced.\n"); + log("Similarly, a wire named in the form `_TECHMAP_REPLACE_.<suffix>` will cause a\n"); + log("new wire alias to be created and named as above but with the `_TECHMAP_REPLACE_'\n"); + log("prefix also substituted.\n"); log("\n"); log("See 'help extract' for a pass that does the opposite thing.\n"); log("\n"); diff --git a/passes/tests/test_autotb.cc b/passes/tests/test_autotb.cc index a5ac3130f..2b6a86c25 100644 --- a/passes/tests/test_autotb.cc +++ b/passes/tests/test_autotb.cc @@ -351,6 +351,11 @@ struct TestAutotbBackend : public Backend { log(" -n <int>\n"); log(" number of iterations the test bench should run (default = 1000)\n"); log("\n"); + log(" -seed <int>\n"); + log(" seed used for pseudo-random number generation (default = 0).\n"); + log(" a value of 0 will cause an arbitrary seed to be chosen, based on\n"); + log(" the current system time.\n"); + log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE { |