From f46ac1df9f8847dac9d9851f2f948d93a1064ff1 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Wed, 2 Oct 2019 16:08:46 -0700 Subject: Be mindful that sigmap(wire) could have dupes when checking \init --- passes/sat/sat.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/sat/sat.cc b/passes/sat/sat.cc index 430bba1e8..93a4f225e 100644 --- a/passes/sat/sat.cc +++ b/passes/sat/sat.cc @@ -265,15 +265,18 @@ struct SatHelper RTLIL::SigSpec rhs = it.second->attributes.at("\\init"); log_assert(lhs.size() == rhs.size()); + dict seen_init; RTLIL::SigSpec removed_bits; for (int i = 0; i < lhs.size(); i++) { RTLIL::SigSpec bit = lhs.extract(i, 1); - if (rhs[i] == State::Sx || !satgen.initial_state.check_all(bit)) { + if (rhs[i] == State::Sx || !satgen.initial_state.check_all(bit) || seen_init.at(bit, rhs[i]) != rhs[i]) { removed_bits.append(bit); lhs.remove(i, 1); rhs.remove(i, 1); i--; } + else + seen_init[bit] = rhs[i]; } if (removed_bits.size()) -- cgit v1.2.3 From d99810ad8a0d56a13cca75c4b129f932491d9c80 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Wed, 2 Oct 2019 17:53:42 -0700 Subject: Refactor peepopt_dffmux and be sensitive to \init when trimming --- passes/pmgen/peepopt_dffmux.pmg | 95 +++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 32 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index c88a52226..2ec504cb4 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -8,21 +8,23 @@ match dff select GetSize(port(dff, \D)) > 1 endmatch +code sigD + sigD = port(dff, \D); +endcode + match rstmux select rstmux->type == $mux select GetSize(port(rstmux, \Y)) > 1 - index port(rstmux, \Y) === port(dff, \D) + index port(rstmux, \Y) === sigD choice BA {\B, \A} select port(rstmux, BA).is_fully_const() set rstmuxBA BA - optional + semioptional endmatch code sigD if (rstmux) sigD = port(rstmux, rstmuxBA == \B ? \A : \B); - else - sigD = port(dff, \D); endcode match cemux @@ -32,45 +34,70 @@ match cemux choice AB {\A, \B} index port(cemux, AB) === port(dff, \Q) set cemuxAB AB + semioptional endmatch code - SigSpec D = port(cemux, cemuxAB == \A ? \B : \A); - SigSpec Q = port(dff, \Q); + if (!cemux && !rstmux) + reject; +endcode + +code Const rst; - if (rstmux) + SigSpec D; + if (cemux) { + D = port(cemux, cemuxAB == \A ? \B : \A); + if (rstmux) + rst = port(rstmux, rstmuxBA).as_const(); + else + rst = Const(State::Sx, GetSize(D)); + } + else { + log_assert(rstmux); + D = port(rstmux, rstmuxBA == \B ? \A : \B); rst = port(rstmux, rstmuxBA).as_const(); + } + SigSpec Q = port(dff, \Q); int width = GetSize(D); - SigSpec &ceA = cemux->connections_.at(\A); - SigSpec &ceB = cemux->connections_.at(\B); - SigSpec &ceY = cemux->connections_.at(\Y); SigSpec &dffD = dff->connections_.at(\D); SigSpec &dffQ = dff->connections_.at(\Q); + Const init; + for (const auto &b : Q) { + auto it = b.wire->attributes.find(\init); + init.bits.push_back(it == b.wire->attributes.end() ? State::Sx : it->second[b.offset]); + } - if (D[width-1] == D[width-2]) { - did_something = true; + auto cmpx = [=](State lhs, State rhs) { + if (lhs == State::Sx || rhs == State::Sx) + return true; + return lhs == rhs; + }; - SigBit sign = D[width-1]; - bool is_signed = sign.wire; - int i; - for (i = width-1; i >= 2; i--) { - if (!is_signed) { - module->connect(Q[i], sign); - if (D[i-1] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } - else { - module->connect(Q[i], Q[i-1]); - if (D[i-2] != sign || (rst.size() && rst[i-1] != rst[width-1])) - break; - } + int i = width; + while (i > 2) { + i--; + if (D[i] != D[i-1]) + break; + if (!cmpx(rst[i], rst[i-1])) + break; + if (!cmpx(init[i], init[i-1])) + break; + if (!cmpx(rst[i], init[i])) + break; + module->connect(Q[i], Q[i-1]); + did_something = true; + } + if (i < width-1) { + if (cemux) { + SigSpec &ceA = cemux->connections_.at(\A); + SigSpec &ceB = cemux->connections_.at(\B); + SigSpec &ceY = cemux->connections_.at(\Y); + ceA.remove(i, width-i); + ceB.remove(i, width-i); + ceY.remove(i, width-i); + cemux->fixup_parameters(); } - - ceA.remove(i, width-i); - ceB.remove(i, width-i); - ceY.remove(i, width-i); - cemux->fixup_parameters(); dffD.remove(i, width-i); dffQ.remove(i, width-i); dff->fixup_parameters(); @@ -78,7 +105,11 @@ code log("dffcemux pattern in %s: dff=%s, cemux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux), width-i); accept; } - else { + else if (cemux) { + SigSpec &ceA = cemux->connections_.at(\A); + SigSpec &ceB = cemux->connections_.at(\B); + SigSpec &ceY = cemux->connections_.at(\Y); + int count = 0; for (int i = width-1; i >= 0; i--) { if (D[i].wire) -- cgit v1.2.3 From e9645c7fa7fc349afad103ff8736699bb4dc0412 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Wed, 2 Oct 2019 21:26:26 -0700 Subject: Fix broken CI, check reset even for constants, trim rstmux --- passes/pmgen/peepopt_dffmux.pmg | 49 ++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index 2ec504cb4..bfd155c58 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -74,9 +74,9 @@ code return lhs == rhs; }; - int i = width; - while (i > 2) { - i--; + int i = width-1; + while (i > 1) { + log_dump(i, D[i], D[i-1], rst[i], rst[i-1], init[i], init[i-1]); if (D[i] != D[i-1]) break; if (!cmpx(rst[i], rst[i-1])) @@ -86,26 +86,36 @@ code if (!cmpx(rst[i], init[i])) break; module->connect(Q[i], Q[i-1]); - did_something = true; + i--; } if (i < width-1) { + did_something = true; if (cemux) { SigSpec &ceA = cemux->connections_.at(\A); SigSpec &ceB = cemux->connections_.at(\B); SigSpec &ceY = cemux->connections_.at(\Y); - ceA.remove(i, width-i); - ceB.remove(i, width-i); - ceY.remove(i, width-i); + ceA.remove(i, width-1-i); + ceB.remove(i, width-1-i); + ceY.remove(i, width-1-i); cemux->fixup_parameters(); } - dffD.remove(i, width-i); - dffQ.remove(i, width-i); + if (rstmux) { + SigSpec &rstA = rstmux->connections_.at(\A); + SigSpec &rstB = rstmux->connections_.at(\B); + SigSpec &rstY = rstmux->connections_.at(\Y); + rstA.remove(i, width-1-i); + rstB.remove(i, width-1-i); + rstY.remove(i, width-1-i); + rstmux->fixup_parameters(); + } + dffD.remove(i, width-1-i); + dffQ.remove(i, width-1-i); dff->fixup_parameters(); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux), width-i); - accept; + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i); + width = i+1; } - else if (cemux) { + if (cemux) { SigSpec &ceA = cemux->connections_.at(\A); SigSpec &ceB = cemux->connections_.at(\B); SigSpec &ceY = cemux->connections_.at(\Y); @@ -114,15 +124,7 @@ code for (int i = width-1; i >= 0; i--) { if (D[i].wire) continue; - Wire *w = Q[i].wire; - auto it = w->attributes.find(\init); - State init; - if (it != w->attributes.end()) - init = it->second[Q[i].offset]; - else - init = State::Sx; - - if (init == State::Sx || init == D[i].data) { + if (cmpx(rst[i], D[i].data) && cmpx(init[i], D[i].data)) { count++; module->connect(Q[i], D[i]); ceA.remove(i); @@ -136,9 +138,10 @@ code did_something = true; cemux->fixup_parameters(); dff->fixup_parameters(); - log("dffcemux pattern in %s: dff=%s, cemux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), count); + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count); } + } + if (did_something) accept; - } endcode -- cgit v1.2.3 From c6d15c9aade55a87595693ecb9170ae8b595e28c Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Thu, 3 Oct 2019 10:07:03 -0700 Subject: Revert "Update doc for equiv_opt" This reverts commit a274b7cc86d4f64541d3d2903b4eeed4616ab1d8. --- passes/equiv/equiv_opt.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index 4ab5b1a3e..9fe3bbd57 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -32,8 +32,7 @@ struct EquivOptPass:public ScriptPass log("\n"); log(" equiv_opt [options] [command]\n"); log("\n"); - log("This command uses temporal induction to check circuit equivalence before and\n"); - log("after an optimization pass.\n"); + log("This command checks circuit equivalence before and after an optimization pass.\n"); log("\n"); log(" -run :\n"); log(" only run the commands between the labels (see below). an empty\n"); @@ -157,7 +156,7 @@ struct EquivOptPass:public ScriptPass if (check_label("prove")) { if (multiclock || help_mode) run("clk2fflogic", "(only with -multiclock)"); - if (!multiclock || help_mode) + else run("async2sync", "(only without -multiclock)"); run("equiv_make gold gate equiv"); if (help_mode) -- cgit v1.2.3 From 8765ec3c27f38e6fb57d057be9605788e144388b Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Thu, 3 Oct 2019 10:07:15 -0700 Subject: Revert "equiv_opt to call async2sync when not -multiclock like SymbiYosys" This reverts commit a39505e329cc05dbd4ad624a1cf0f6caf664fd9a. --- passes/equiv/equiv_opt.cc | 2 -- 1 file changed, 2 deletions(-) (limited to 'passes') diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index 9fe3bbd57..d4c7f7953 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -156,8 +156,6 @@ struct EquivOptPass:public ScriptPass if (check_label("prove")) { if (multiclock || help_mode) run("clk2fflogic", "(only with -multiclock)"); - else - run("async2sync", "(only without -multiclock)"); run("equiv_make gold gate equiv"); if (help_mode) run("equiv_induct [-undef] equiv"); -- cgit v1.2.3 From 7a6dec1cef9c6a44dafe83d884abaf06dc77ab07 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Thu, 3 Oct 2019 10:30:51 -0700 Subject: Add new -async2sync option --- passes/equiv/equiv_opt.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index d4c7f7953..d13e46ce4 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -58,7 +58,7 @@ struct EquivOptPass:public ScriptPass } std::string command, techmap_opts; - bool assert, undef, multiclock; + bool assert, undef, multiclock, async2sync; void clear_flags() YS_OVERRIDE { @@ -67,6 +67,7 @@ struct EquivOptPass:public ScriptPass assert = false; undef = false; multiclock = false; + async2sync = false; } void execute(std::vector < std::string > args, RTLIL::Design * design) YS_OVERRIDE @@ -100,6 +101,10 @@ struct EquivOptPass:public ScriptPass multiclock = true; continue; } + if (args[argidx] == "-async2sync") { + async2sync = true; + continue; + } break; } @@ -119,6 +124,9 @@ struct EquivOptPass:public ScriptPass if (!design->full_selection()) log_cmd_error("This command only operates on fully selected designs!\n"); + if (async2sync && multiclock) + log_cmd_error("The '-async2sync' and '-multiclock' options are mutually exclusive!\n"); + log_header(design, "Executing EQUIV_OPT pass.\n"); log_push(); @@ -156,6 +164,8 @@ struct EquivOptPass:public ScriptPass if (check_label("prove")) { if (multiclock || help_mode) run("clk2fflogic", "(only with -multiclock)"); + if (async2sync || help_mode) + run("async2sync", "(only with -async2sync)"); run("equiv_make gold gate equiv"); if (help_mode) run("equiv_induct [-undef] equiv"); -- cgit v1.2.3 From a9efd2e81cd502665ee034f64c85b11e34dfd9bb Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Thu, 3 Oct 2019 10:51:53 -0700 Subject: Restore part of doc --- passes/equiv/equiv_opt.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index d13e46ce4..ec1200488 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -32,7 +32,8 @@ struct EquivOptPass:public ScriptPass log("\n"); log(" equiv_opt [options] [command]\n"); log("\n"); - log("This command checks circuit equivalence before and after an optimization pass.\n"); + log("This command uses temporal induction to check circuit equivalence before and\n"); + log("after an optimization pass.\n"); log("\n"); log(" -run :\n"); log(" only run the commands between the labels (see below). an empty\n"); -- cgit v1.2.3 From c0b14cfea7c5650ddbc28d69de4749f845954dc7 Mon Sep 17 00:00:00 2001 From: Miodrag Milanovic Date: Fri, 4 Oct 2019 16:29:46 +0200 Subject: Fixes for MSVC build --- passes/pmgen/xilinx_dsp.cc | 1 + 1 file changed, 1 insertion(+) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 11c7e5ea8..3ff921957 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -20,6 +20,7 @@ #include "kernel/yosys.h" #include "kernel/sigtools.h" +#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -- cgit v1.2.3 From 84f978bdc20494167a6a2c5f654b96c4f565a5e0 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 10:17:46 -0700 Subject: Add -async2sync to help text as per @daveshah1 --- passes/equiv/equiv_opt.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/equiv/equiv_opt.cc b/passes/equiv/equiv_opt.cc index ec1200488..c7e6d71a6 100644 --- a/passes/equiv/equiv_opt.cc +++ b/passes/equiv/equiv_opt.cc @@ -50,6 +50,9 @@ struct EquivOptPass:public ScriptPass log(" -multiclock\n"); log(" run clk2fflogic before equivalence checking.\n"); log("\n"); + log(" -async2sync\n"); + log(" run async2sync before equivalence checking.\n"); + log("\n"); log(" -undef\n"); log(" enable modelling of undef states during equiv_induct.\n"); log("\n"); @@ -166,7 +169,7 @@ struct EquivOptPass:public ScriptPass if (multiclock || help_mode) run("clk2fflogic", "(only with -multiclock)"); if (async2sync || help_mode) - run("async2sync", "(only with -async2sync)"); + run("async2sync", " (only with -async2sync)"); run("equiv_make gold gate equiv"); if (help_mode) run("equiv_induct [-undef] equiv"); -- cgit v1.2.3 From aae2b9fd9c8dc915fadacc24962436dd7aedff36 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 11:04:10 -0700 Subject: Rename abc_* names/attributes to more precisely be abc9_* --- passes/techmap/abc9.cc | 130 ++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'passes') diff --git a/passes/techmap/abc9.cc b/passes/techmap/abc9.cc index 09d6e9670..1ebdaa29e 100644 --- a/passes/techmap/abc9.cc +++ b/passes/techmap/abc9.cc @@ -71,21 +71,21 @@ RTLIL::Module *module; bool clk_polarity, en_polarity; RTLIL::SigSpec clk_sig, en_sig; -inline std::string remap_name(RTLIL::IdString abc_name) +inline std::string remap_name(RTLIL::IdString abc9_name) { - return stringf("$abc$%d$%s", map_autoidx, abc_name.c_str()+1); + return stringf("$abc$%d$%s", map_autoidx, abc9_name.c_str()+1); } void handle_loops(RTLIL::Design *design) { - Pass::call(design, "scc -set_attr abc_scc_id {}"); + Pass::call(design, "scc -set_attr abc9_scc_id {}"); // For every unique SCC found, (arbitrarily) find the first // cell in the component, and select (and mark) all its output // wires pool ids_seen; for (auto cell : module->cells()) { - auto it = cell->attributes.find(ID(abc_scc_id)); + auto it = cell->attributes.find(ID(abc9_scc_id)); if (it != cell->attributes.end()) { auto r = ids_seen.insert(it->second); if (r.second) { @@ -105,7 +105,7 @@ void handle_loops(RTLIL::Design *design) log_assert(w->port_input); log_assert(b.offset < GetSize(w)); } - w->set_bool_attribute(ID(abc_scc_break)); + w->set_bool_attribute(ID(abc9_scc_break)); module->swap_names(b.wire, w); c.second = RTLIL::SigBit(w, b.offset); } @@ -118,7 +118,7 @@ void handle_loops(RTLIL::Design *design) module->fixup_ports(); } -std::string add_echos_to_abc_cmd(std::string str) +std::string add_echos_to_abc9_cmd(std::string str) { std::string new_str, token; for (size_t i = 0; i < str.size(); i++) { @@ -140,7 +140,7 @@ std::string add_echos_to_abc_cmd(std::string str) return new_str; } -std::string fold_abc_cmd(std::string str) +std::string fold_abc9_cmd(std::string str) { std::string token, new_str = " "; int char_counter = 10; @@ -184,7 +184,7 @@ std::string replace_tempdir(std::string text, std::string tempdir_name, bool sho return text; } -struct abc_output_filter +struct abc9_output_filter { bool got_cr; int escape_seq_state; @@ -192,7 +192,7 @@ struct abc_output_filter std::string tempdir_name; bool show_tempdir; - abc_output_filter(std::string tempdir_name, bool show_tempdir) : tempdir_name(tempdir_name), show_tempdir(show_tempdir) + abc9_output_filter(std::string tempdir_name, bool show_tempdir) : tempdir_name(tempdir_name), show_tempdir(show_tempdir) { got_cr = false; escape_seq_state = 0; @@ -293,68 +293,68 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri log_header(design, "Extracting gate netlist of module `%s' to `%s/input.xaig'..\n", module->name.c_str(), replace_tempdir(tempdir_name, tempdir_name, show_tempdir).c_str()); - std::string abc_script; + std::string abc9_script; if (!lut_costs.empty()) { - abc_script += stringf("read_lut %s/lutdefs.txt; ", tempdir_name.c_str()); + abc9_script += stringf("read_lut %s/lutdefs.txt; ", tempdir_name.c_str()); if (!box_file.empty()) - abc_script += stringf("read_box -v %s; ", box_file.c_str()); + abc9_script += stringf("read_box -v %s; ", box_file.c_str()); } else if (!lut_file.empty()) { - abc_script += stringf("read_lut %s; ", lut_file.c_str()); + abc9_script += stringf("read_lut %s; ", lut_file.c_str()); if (!box_file.empty()) - abc_script += stringf("read_box -v %s; ", box_file.c_str()); + abc9_script += stringf("read_box -v %s; ", box_file.c_str()); } else log_abort(); - abc_script += stringf("&read %s/input.xaig; &ps; ", tempdir_name.c_str()); + abc9_script += stringf("&read %s/input.xaig; &ps; ", tempdir_name.c_str()); if (!script_file.empty()) { if (script_file[0] == '+') { for (size_t i = 1; i < script_file.size(); i++) if (script_file[i] == '\'') - abc_script += "'\\''"; + abc9_script += "'\\''"; else if (script_file[i] == ',') - abc_script += " "; + abc9_script += " "; else - abc_script += script_file[i]; + abc9_script += script_file[i]; } else - abc_script += stringf("source %s", script_file.c_str()); + abc9_script += stringf("source %s", script_file.c_str()); } else if (!lut_costs.empty() || !lut_file.empty()) { //bool all_luts_cost_same = true; //for (int this_cost : lut_costs) // if (this_cost != lut_costs.front()) // all_luts_cost_same = false; - abc_script += fast_mode ? ABC_FAST_COMMAND_LUT : ABC_COMMAND_LUT; + abc9_script += fast_mode ? ABC_FAST_COMMAND_LUT : ABC_COMMAND_LUT; //if (all_luts_cost_same && !fast_mode) - // abc_script += "; lutpack {S}"; + // abc9_script += "; lutpack {S}"; } else log_abort(); //if (script_file.empty() && !delay_target.empty()) - // for (size_t pos = abc_script.find("dretime;"); pos != std::string::npos; pos = abc_script.find("dretime;", pos+1)) - // abc_script = abc_script.substr(0, pos) + "dretime; retime -o {D};" + abc_script.substr(pos+8); + // for (size_t pos = abc9_script.find("dretime;"); pos != std::string::npos; pos = abc9_script.find("dretime;", pos+1)) + // abc9_script = abc9_script.substr(0, pos) + "dretime; retime -o {D};" + abc9_script.substr(pos+8); - for (size_t pos = abc_script.find("{D}"); pos != std::string::npos; pos = abc_script.find("{D}", pos)) - abc_script = abc_script.substr(0, pos) + delay_target + abc_script.substr(pos+3); + for (size_t pos = abc9_script.find("{D}"); pos != std::string::npos; pos = abc9_script.find("{D}", pos)) + abc9_script = abc9_script.substr(0, pos) + delay_target + abc9_script.substr(pos+3); - //for (size_t pos = abc_script.find("{S}"); pos != std::string::npos; pos = abc_script.find("{S}", pos)) - // abc_script = abc_script.substr(0, pos) + lutin_shared + abc_script.substr(pos+3); + //for (size_t pos = abc9_script.find("{S}"); pos != std::string::npos; pos = abc9_script.find("{S}", pos)) + // abc9_script = abc9_script.substr(0, pos) + lutin_shared + abc9_script.substr(pos+3); - for (size_t pos = abc_script.find("{W}"); pos != std::string::npos; pos = abc_script.find("{W}", pos)) - abc_script = abc_script.substr(0, pos) + wire_delay + abc_script.substr(pos+3); + for (size_t pos = abc9_script.find("{W}"); pos != std::string::npos; pos = abc9_script.find("{W}", pos)) + abc9_script = abc9_script.substr(0, pos) + wire_delay + abc9_script.substr(pos+3); - abc_script += stringf("; &write %s/output.aig", tempdir_name.c_str()); - abc_script = add_echos_to_abc_cmd(abc_script); + abc9_script += stringf("; &write %s/output.aig", tempdir_name.c_str()); + abc9_script = add_echos_to_abc9_cmd(abc9_script); - for (size_t i = 0; i+1 < abc_script.size(); i++) - if (abc_script[i] == ';' && abc_script[i+1] == ' ') - abc_script[i+1] = '\n'; + for (size_t i = 0; i+1 < abc9_script.size(); i++) + if (abc9_script[i] == ';' && abc9_script[i+1] == ' ') + abc9_script[i+1] = '\n'; FILE *f = fopen(stringf("%s/abc.script", tempdir_name.c_str()).c_str(), "wt"); - fprintf(f, "%s\n", abc_script.c_str()); + fprintf(f, "%s\n", abc9_script.c_str()); fclose(f); if (dff_mode || !clk_str.empty()) @@ -420,7 +420,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri // the expose operation -- remove them from PO/PI // and re-connecting them back together for (auto wire : module->wires()) { - auto it = wire->attributes.find(ID(abc_scc_break)); + auto it = wire->attributes.find(ID(abc9_scc_break)); if (it != wire->attributes.end()) { wire->attributes.erase(it); log_assert(wire->port_output); @@ -450,22 +450,22 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri log("Running ABC command: %s\n", replace_tempdir(buffer, tempdir_name, show_tempdir).c_str()); #ifndef YOSYS_LINK_ABC - abc_output_filter filt(tempdir_name, show_tempdir); - int ret = run_command(buffer, std::bind(&abc_output_filter::next_line, filt, std::placeholders::_1)); + abc9_output_filter filt(tempdir_name, show_tempdir); + int ret = run_command(buffer, std::bind(&abc9_output_filter::next_line, filt, std::placeholders::_1)); #else // These needs to be mutable, supposedly due to getopt - char *abc_argv[5]; + char *abc9_argv[5]; string tmp_script_name = stringf("%s/abc.script", tempdir_name.c_str()); - abc_argv[0] = strdup(exe_file.c_str()); - abc_argv[1] = strdup("-s"); - abc_argv[2] = strdup("-f"); - abc_argv[3] = strdup(tmp_script_name.c_str()); - abc_argv[4] = 0; - int ret = Abc_RealMain(4, abc_argv); - free(abc_argv[0]); - free(abc_argv[1]); - free(abc_argv[2]); - free(abc_argv[3]); + abc9_argv[0] = strdup(exe_file.c_str()); + abc9_argv[1] = strdup("-s"); + abc9_argv[2] = strdup("-f"); + abc9_argv[3] = strdup(tmp_script_name.c_str()); + abc9_argv[4] = 0; + int ret = Abc_RealMain(4, abc9_argv); + free(abc9_argv[0]); + free(abc9_argv[1]); + free(abc9_argv[2]); + free(abc9_argv[3]); #endif if (ret != 0) log_error("ABC: execution of command \"%s\" failed: return code %d.\n", buffer.c_str(), ret); @@ -513,7 +513,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri signal = std::move(bits); } - dict abc_box; + dict abc9_box; vector boxes; for (const auto &it : module->cells_) { auto cell = it.second; @@ -521,10 +521,10 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri module->remove(cell); continue; } - auto jt = abc_box.find(cell->type); - if (jt == abc_box.end()) { + auto jt = abc9_box.find(cell->type); + if (jt == abc9_box.end()) { RTLIL::Module* box_module = design->module(cell->type); - jt = abc_box.insert(std::make_pair(cell->type, box_module && box_module->attributes.count(ID(abc_box_id)))).first; + jt = abc9_box.insert(std::make_pair(cell->type, box_module && box_module->attributes.count(ID(abc9_box_id)))).first; } if (jt->second) boxes.emplace_back(cell); @@ -648,7 +648,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri if (!conn.second.is_wire()) continue; Wire *wire = conn.second.as_wire(); - if (!wire->get_bool_attribute(ID(abc_padding))) + if (!wire->get_bool_attribute(ID(abc9_padding))) continue; cell->unsetPort(conn.first); log_debug("Dropping padded port connection for %s (%s) .%s (%s )\n", log_id(cell), cell->type.c_str(), log_id(conn.first), log_signal(conn.second)); @@ -827,17 +827,17 @@ struct Abc9Pass : public Pass { log(" if no -script parameter is given, the following scripts are used:\n"); log("\n"); log(" for -lut/-luts (only one LUT size):\n"); - log("%s\n", fold_abc_cmd(ABC_COMMAND_LUT /*"; lutpack {S}"*/).c_str()); + log("%s\n", fold_abc9_cmd(ABC_COMMAND_LUT /*"; lutpack {S}"*/).c_str()); log("\n"); log(" for -lut/-luts (different LUT sizes):\n"); - log("%s\n", fold_abc_cmd(ABC_COMMAND_LUT).c_str()); + log("%s\n", fold_abc9_cmd(ABC_COMMAND_LUT).c_str()); log("\n"); log(" -fast\n"); log(" use different default scripts that are slightly faster (at the cost\n"); log(" of output quality):\n"); log("\n"); log(" for -lut/-luts:\n"); - log("%s\n", fold_abc_cmd(ABC_FAST_COMMAND_LUT).c_str()); + log("%s\n", fold_abc9_cmd(ABC_FAST_COMMAND_LUT).c_str()); log("\n"); log(" -D \n"); log(" set delay target. the string {D} in the default scripts above is\n"); @@ -1057,7 +1057,7 @@ struct Abc9Pass : public Pass { dict box_lookup; for (auto m : design->modules()) { - auto it = m->attributes.find(ID(abc_box_id)); + auto it = m->attributes.find(ID(abc9_box_id)); if (it == m->attributes.end()) continue; if (m->name.begins_with("$paramod")) @@ -1065,7 +1065,7 @@ struct Abc9Pass : public Pass { auto id = it->second.as_int(); auto r = box_lookup.insert(std::make_pair(id, m->name)); if (!r.second) - log_error("Module '%s' has the same abc_box_id = %d value as '%s'.\n", + log_error("Module '%s' has the same abc9_box_id = %d value as '%s'.\n", log_id(m), id, log_id(r.first->second)); log_assert(r.second); @@ -1073,24 +1073,24 @@ struct Abc9Pass : public Pass { for (auto p : m->ports) { auto w = m->wire(p); log_assert(w); - if (w->attributes.count(ID(abc_carry))) { + if (w->attributes.count(ID(abc9_carry))) { if (w->port_input) { if (carry_in) - log_error("Module '%s' contains more than one 'abc_carry' input port.\n", log_id(m)); + log_error("Module '%s' contains more than one 'abc9_carry' input port.\n", log_id(m)); carry_in = w; } else if (w->port_output) { if (carry_out) - log_error("Module '%s' contains more than one 'abc_carry' input port.\n", log_id(m)); + log_error("Module '%s' contains more than one 'abc9_carry' input port.\n", log_id(m)); carry_out = w; } } } if (carry_in || carry_out) { if (carry_in && !carry_out) - log_error("Module '%s' contains an 'abc_carry' input port but no output port.\n", log_id(m)); + log_error("Module '%s' contains an 'abc9_carry' input port but no output port.\n", log_id(m)); if (!carry_in && carry_out) - log_error("Module '%s' contains an 'abc_carry' output port but no input port.\n", log_id(m)); + log_error("Module '%s' contains an 'abc9_carry' output port but no input port.\n", log_id(m)); // Make carry_in the last PI, and carry_out the last PO // since ABC requires it this way auto &ports = m->ports; @@ -1118,7 +1118,7 @@ struct Abc9Pass : public Pass { for (auto mod : design->selected_modules()) { - if (mod->attributes.count(ID(abc_box_id))) + if (mod->attributes.count(ID(abc9_box_id))) continue; if (mod->processes.size() > 0) { -- cgit v1.2.3 From 74ef8feeaf63b41e8948ce09d40420ccdb48957a Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 16:46:15 -0700 Subject: Fix xilinx_dsp for unsigned extensions --- passes/pmgen/xilinx_dsp.pmg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 3d0b1f2c3..4e174e753 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -277,7 +277,9 @@ match postAdd index port(postAdd, AB)[0] === sigP[0] filter GetSize(port(postAdd, AB)) >= GetSize(sigP) filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP - filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) + // Check that remainder of AB is a sign-extension + define AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool()) + filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) set postAddAB AB optional endmatch -- cgit v1.2.3 From 0acc51c3d82f65f73fa9e475c6fc41beabd925a6 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 17:35:43 -0700 Subject: Add temporary `abc9 -nomfs` and use for `synth_xilinx -abc9` --- passes/techmap/abc9.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/techmap/abc9.cc b/passes/techmap/abc9.cc index 09d6e9670..8932e860a 100644 --- a/passes/techmap/abc9.cc +++ b/passes/techmap/abc9.cc @@ -247,7 +247,7 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri bool cleanup, vector lut_costs, bool dff_mode, std::string clk_str, bool /*keepff*/, std::string delay_target, std::string /*lutin_shared*/, bool fast_mode, bool show_tempdir, std::string box_file, std::string lut_file, - std::string wire_delay, const dict &box_lookup + std::string wire_delay, const dict &box_lookup, bool nomfs ) { module = current_module; @@ -346,6 +346,11 @@ void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::stri for (size_t pos = abc_script.find("{W}"); pos != std::string::npos; pos = abc_script.find("{W}", pos)) abc_script = abc_script.substr(0, pos) + wire_delay + abc_script.substr(pos+3); + if (nomfs) + for (size_t pos = abc_script.find("&mfs"); pos != std::string::npos; pos = abc_script.find("&mfs", pos)) + abc_script = abc_script.erase(pos, strlen("&mfs")); + + abc_script += stringf("; &write %s/output.aig", tempdir_name.c_str()); abc_script = add_echos_to_abc_cmd(abc_script); @@ -921,6 +926,7 @@ struct Abc9Pass : public Pass { std::string delay_target, lutin_shared = "-S 1", wire_delay; bool fast_mode = false, dff_mode = false, keepff = false, cleanup = true; bool show_tempdir = false; + bool nomfs = false; vector lut_costs; markgroups = false; @@ -1043,6 +1049,10 @@ struct Abc9Pass : public Pass { wire_delay = "-W " + args[++argidx]; continue; } + if (arg == "-nomfs") { + nomfs = true; + continue; + } break; } extra_args(args, argidx, design); @@ -1131,7 +1141,7 @@ struct Abc9Pass : public Pass { if (!dff_mode || !clk_str.empty()) { abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, dff_mode, clk_str, keepff, delay_target, lutin_shared, fast_mode, show_tempdir, - box_file, lut_file, wire_delay, box_lookup); + box_file, lut_file, wire_delay, box_lookup, nomfs); continue; } @@ -1277,7 +1287,7 @@ struct Abc9Pass : public Pass { en_sig = assign_map(std::get<3>(it.first)); abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, !clk_sig.empty(), "$", keepff, delay_target, lutin_shared, fast_mode, show_tempdir, - box_file, lut_file, wire_delay, box_lookup); + box_file, lut_file, wire_delay, box_lookup, nomfs); assign_map.set(mod); } } -- cgit v1.2.3 From cf82b38478e598c915d14d595b554fc122034850 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:40:34 -0700 Subject: Add comments for xilinx_dsp --- passes/pmgen/xilinx_dsp.cc | 14 ++++-- passes/pmgen/xilinx_dsp.pmg | 93 +++++++++++++++++++++++++++++++++++++++- passes/pmgen/xilinx_dsp_CREG.pmg | 33 +++++++++++++- 3 files changed, 134 insertions(+), 6 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 11c7e5ea8..489887207 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -608,8 +608,13 @@ struct XilinxDspPass : public Pass { extra_args(args, argidx, design); for (auto module : design->selected_modules()) { + // Experimental feature: pack $add/$sub cells with + // (* use_dsp48="simd" *) into DSP48E1's using its + // SIMD feature xilinx_simd_pack(module, module->selected_cells()); + // Match for all features ([ABDMP][12]?REG, pre-adder, + // (post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); @@ -618,14 +623,17 @@ struct XilinxDspPass : public Pass { // is no guarantee that the cell ordering corresponds // to the "expected" case (i.e. the order in which // they appear in the source) thus the possiblity - // existed that a register got packed as CREG into a + // existed that a register got packed as a CREG into a // downstream DSP that should have otherwise been a - // PREG of an upstream DSP that had not been pattern - // matched yet + // PREG of an upstream DSP that had not been visited + // yet { xilinx_dsp_CREG_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_packC(xilinx_dsp_packC); } + // Lastly, identify and utilise PCOUT -> PCIN, + // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade + // chains { xilinx_dsp_cascade_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_cascade(); diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 4e174e753..bcf966a8a 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -1,3 +1,53 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// ( 1) Starting from a DSP48E1 cell +// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If ADREG matched, treat 'A' input as input of ADREG +// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// If A2REG, then match 'A' input for A1REG +// ( 5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached +// to the 'P' output where it is compared to a constant that is a +// power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +// Notes: +// - The intention of this pattern matcher is for it to be compatible with +// DSP48E1 cells inferred from multiply operations by Yosys, as well as for +// user instantiations that may already contain the cells being packed... +// (though the latter is currently untested) +// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each +// *REG match, it has been factored out into two subpatterns: in_dffe +// out_dffe located at the bottom of this file + pattern xilinx_dsp_pack state clock @@ -5,12 +55,11 @@ state sigA sigB sigC sigD sigM sigP state postAddAB postAddMuxAB state ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol - state ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset @@ -19,6 +68,7 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch @@ -53,6 +103,7 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; + // TODO: Check if necessary // This sigM could have no users if downstream $add // is narrower than $mul result, for example if (sigM.empty()) @@ -61,6 +112,10 @@ code sigA sigB sigC sigD sigM clock clock = port(dsp, \CLK, SigBit()); endcode +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; @@ -81,6 +136,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock } endcode +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used @@ -103,6 +160,7 @@ match preAdd endmatch code sigA sigD + // TODO: Check if this is necessary? if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); @@ -111,6 +169,9 @@ code sigA sigD } endcode +// (4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) @@ -173,6 +234,8 @@ ffA1_end: ; } endcode +// (5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; @@ -222,6 +285,7 @@ ffB1_end: ; } endcode +// (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; @@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock } endcode +// (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; @@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock sigP = sigM; endcode +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() @@ -291,6 +361,7 @@ code sigC sigP } endcode +// (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; @@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock } endcode +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ match postAddMux if postAdd if ffP @@ -333,6 +417,11 @@ code sigC sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +// the 'P' output where it is compared to a constant that is a power-of-2: +// e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a31dc80bf..a20d3cdce 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,3 +1,25 @@ +// This file describes the second of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// Notes: +// - Separating out CREG packing is necessary since there is no guarantee +// that the cell ordering corresponds to the "expected" case (i.e. the order +// in which they appear in the source) thus the possiblity existed that a +// register got packed as a CREG into a downstream DSP that should have +// otherwise been a PREG of an upstream DSP that had not been visited yet +// - The reason this is separated out from the xilinx_dsp.pmg file is +// for efficiency --- each *.pmg file creates a class of the same basename, +// which when constructed, creates a custom database tailored to the +// pattern(s) contained within. Since the pattern in this file must be +// executed after the pattern contained in xilinx_dsp.pmg, it is necessary +// to reconstruct this database. Separating the two patterns into +// independent files causes two smaller, more specific, databases. + pattern xilinx_dsp_packC udata > unextend @@ -15,13 +37,15 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port match dsp select dsp->type.in(\DSP48E1) select param(dsp, \CREG, 1).as_int() == 0 select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock +code sigC sigP unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -47,7 +71,14 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock } else sigP = P; +endcode +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + // TODO: Any downside to allowing this? + // If this DSP implements an accumulator, do not attempt to match if (sigC == sigP) reject; -- cgit v1.2.3 From 983068103e24c33a1b70eb90dd72fdfaf292e1bd Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:43:19 -0700 Subject: Consistency --- passes/pmgen/xilinx_dsp_CREG.pmg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a20d3cdce..38a5a8d24 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -45,7 +45,7 @@ match dsp select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code sigC sigP +code sigC sigP clock unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -71,6 +71,8 @@ code sigC sigP } else sigP = P; + + clock = port(dsp, \CLK, SigBit()); endcode // (2) Match the driver of the 'C' input to a possible $dff cell (CREG) @@ -82,8 +84,6 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock if (sigC == sigP) reject; - clock = port(dsp, \CLK, SigBit()); - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 7de9c33931020068b285e262bbf239385fcb5c2d Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:43:56 -0700 Subject: Fix TODOs --- passes/pmgen/xilinx_dsp.pmg | 15 --------------- passes/pmgen/xilinx_dsp_CREG.pmg | 5 ----- 2 files changed, 20 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index bcf966a8a..6b6151564 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -103,11 +103,6 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; - // TODO: Check if necessary - // This sigM could have no users if downstream $add - // is narrower than $mul result, for example - if (sigM.empty()) - reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -159,16 +154,6 @@ match preAdd optional endmatch -code sigA sigD - // TODO: Check if this is necessary? - if (preAdd) { - sigA = port(preAdd, \A); - sigD = port(preAdd, \B); - if (GetSize(sigA) < GetSize(sigD)) - std::swap(sigA, sigD); - } -endcode - // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 38a5a8d24..5697ee737 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,11 +79,6 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock - // TODO: Any downside to allowing this? - // If this DSP implements an accumulator, do not attempt to match - if (sigC == sigP) - reject; - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 6d689726193db4d46f7618ff00707e4f30366ad5 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:31:44 -0700 Subject: More comments, cleanup --- passes/pmgen/xilinx_dsp.pmg | 106 +++++++++++++++++++++++++++------------ passes/pmgen/xilinx_dsp_CREG.pmg | 43 ++++++++++++---- 2 files changed, 108 insertions(+), 41 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 6b6151564..3523db3a4 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -425,22 +425,42 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -453,14 +473,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -472,9 +490,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -506,6 +526,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) @@ -530,16 +554,32 @@ endcode // ####################### +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. +// At a high level: +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) subpattern out_dffe arg argD argQ clock code dff = nullptr; for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; endcode +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs match ffcemux select ffcemux->type.in($mux) // ffcemux output must have two users: ffcemux and ff.D @@ -578,6 +618,10 @@ code argD argQ } endcode +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero match ffrstmux select ffrstmux->type.in($mux) // ffrstmux output must have two users: ffrstmux and ff.D @@ -616,6 +660,8 @@ code argD argQ } endcode +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -632,32 +678,30 @@ match ff // Check that FF.Q is connected to CE-mux filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ - if (ff) { - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - - SigSpec D = port(ff, \D); - SigSpec Q = port(ff, \Q); - if (!ffcemux) { - argQ = argD; - argQ.replace(D, Q); - } - - for (auto c : argQ.chunks()) { - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; - } + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffcemux) { + argQ = argD; + argQ.replace(D, Q); + } - dff = ff; - dffQ = argQ; - dffclock = port(ff, \CLK); + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } - // No enable/reset mux possible without flop - else if (dffcemux || dffrstmux) - reject; + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 5697ee737..3d911b478 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -77,7 +77,7 @@ endcode // (2) Match the driver of the 'C' input to a possible $dff cell (CREG) // (attached to at most two $mux cells that implement clock-enable or -// reset functionality, using a subpattern discussed below) +// reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock argQ = sigC; subpattern(in_dffe); @@ -103,22 +103,41 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -131,14 +150,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -150,9 +167,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -184,6 +203,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) -- cgit v1.2.3 From 52583ecff82eb9dc78e10b7bfd33c1be3d4dcc67 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:33:27 -0700 Subject: Revert "Fix TODOs" This reverts commit 8674a6c68d563908014d16671567459499c6dc99. --- passes/pmgen/xilinx_dsp.pmg | 15 +++++++++++++++ passes/pmgen/xilinx_dsp_CREG.pmg | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 3523db3a4..8a2c2caf5 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -103,6 +103,11 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; + // TODO: Check if necessary + // This sigM could have no users if downstream $add + // is narrower than $mul result, for example + if (sigM.empty()) + reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -154,6 +159,16 @@ match preAdd optional endmatch +code sigA sigD + // TODO: Check if this is necessary? + if (preAdd) { + sigA = port(preAdd, \A); + sigD = port(preAdd, \B); + if (GetSize(sigA) < GetSize(sigD)) + std::swap(sigA, sigD); + } +endcode + // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3d911b478..b87a686a1 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,6 +79,11 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + // TODO: Any downside to allowing this? + // If this DSP implements an accumulator, do not attempt to match + if (sigC == sigP) + reject; + argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 77d7a5c14a6cf7c16b69338ed91d1b9166dba065 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:38:09 -0700 Subject: Retry on fixing TODOs --- passes/pmgen/xilinx_dsp.pmg | 9 +-------- passes/pmgen/xilinx_dsp_CREG.pmg | 5 ----- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 8a2c2caf5..dbc3f7455 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -100,14 +100,10 @@ code sigA sigB sigC sigD sigM clock sigM.append(P[i]); } log_assert(nusers(P.extract_end(i)) <= 1); + log_assert(!sigM.empty()); } else sigM = P; - // TODO: Check if necessary - // This sigM could have no users if downstream $add - // is narrower than $mul result, for example - if (sigM.empty()) - reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -160,12 +156,9 @@ match preAdd endmatch code sigA sigD - // TODO: Check if this is necessary? if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); - if (GetSize(sigA) < GetSize(sigD)) - std::swap(sigA, sigD); } endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index b87a686a1..3d911b478 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,11 +79,6 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock - // TODO: Any downside to allowing this? - // If this DSP implements an accumulator, do not attempt to match - if (sigC == sigP) - reject; - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 8027ebf05b7538e501b4903cab9c2ce6a23610ff Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 21:42:46 -0700 Subject: Restore optimisation for sigM.empty() --- passes/pmgen/xilinx_dsp.pmg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index dbc3f7455..77d4850d4 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -100,7 +100,10 @@ code sigA sigB sigC sigD sigM clock sigM.append(P[i]); } log_assert(nusers(P.extract_end(i)) <= 1); - log_assert(!sigM.empty()); + // This sigM could have no users if downstream sinks (e.g. $add) is + // narrower than $mul result, for example + if (sigM.empty()) + reject; } else sigM = P; -- cgit v1.2.3 From 14e4aeece6adc0808ab1876f01752acb0833185a Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 21:45:31 -0700 Subject: Fix comment --- passes/pmgen/xilinx_dsp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 489887207..886e01c0f 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -614,7 +614,7 @@ struct XilinxDspPass : public Pass { xilinx_simd_pack(module, module->selected_cells()); // Match for all features ([ABDMP][12]?REG, pre-adder, - // (post-adder, pattern detector, etc.) except for CREG + // post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); -- cgit v1.2.3 From 12fd2ec4f05ce5efda2a2d2e4d37aef013f2baf9 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 22:24:15 -0700 Subject: Improve comments for xilinx_dsp_CREG --- passes/pmgen/xilinx_dsp_CREG.pmg | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3d911b478..3f8486406 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -7,11 +7,12 @@ // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) // Notes: -// - Separating out CREG packing is necessary since there is no guarantee -// that the cell ordering corresponds to the "expected" case (i.e. the order -// in which they appear in the source) thus the possiblity existed that a -// register got packed as a CREG into a downstream DSP that should have -// otherwise been a PREG of an upstream DSP that had not been visited yet +// - Running CREG packing after xilinx_dsp_pack is necessary since there is no +// guarantee that the cell ordering corresponds to the "expected" case (i.e. +// the order in which they appear in the source) thus the possiblity existed +// that a register got packed as a CREG into a downstream DSP that should +// have otherwise been a PREG of an upstream DSP that had not been visited +// yet // - The reason this is separated out from the xilinx_dsp.pmg file is // for efficiency --- each *.pmg file creates a class of the same basename, // which when constructed, creates a custom database tailored to the @@ -28,7 +29,7 @@ state sigC sigP state ffCcepol ffCrstpol state ffC ffCcemux ffCrstmux -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset -- cgit v1.2.3 From 792cd31052d32d661a995df0c46f5cb9f27b566b Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 22:25:30 -0700 Subject: Add comments for xilinx_dsp_cascade --- passes/pmgen/xilinx_dsp_cascade.pmg | 112 ++++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 12 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 6f4ac5849..42d1aee6c 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -1,3 +1,46 @@ +// This file describes the third of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP +// (4) Same as (3) but for BCOUT -> BCIN cascade +// (5) Recursively go to (2.1) until no more matches possible, keeping track +// of the longest possible chain found +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate +// Notes: +// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered +// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need +// not be the case --- [AB] cascades can exist independently of a P cascade +// (though all three cascades must come from the same DSP). This situation +// is not handled currently. +// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently +// conservative in that they examine the situation where (a) the previous +// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no +// registers enabled, and (c) that there exists only one additional register +// between the upstream and downstream DSPs. This can certainly be relaxed +// to identify situations ranging from (i) neither DSP uses any registers, +// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and +// there exists a further 2 registers between them. This remains a TODO +// item. + pattern xilinx_dsp_cascade udata > unextend @@ -6,7 +49,7 @@ state next state clock state AREG BREG -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset @@ -19,12 +62,19 @@ code #define MAX_DSP_CASCADE 20 endcode +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port match first select first->type.in(\DSP48E1) select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") select nusers(port(first, \PCOUT, SigSpec())) <= 1 endmatch +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate code longest_chain.clear(); chain.emplace_back(first, -1, -1, -1); @@ -106,6 +156,10 @@ subpattern tail arg first arg next +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused match nextP select nextP->type.in(\DSP48E1) select !param(nextP, \CREG, State::S1).as_bool() @@ -116,6 +170,8 @@ match nextP semioptional endmatch +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits match nextP_shift17 if !nextP select nextP_shift17->type.in(\DSP48E1) @@ -145,6 +201,14 @@ code next } endcode +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP code argQ clock AREG AREG = -1; if (next) { @@ -152,7 +216,6 @@ code argQ clock AREG if (param(prev, \AREG, 2).as_int() > 0 && param(next, \AREG, 2).as_int() > 0 && param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && - port(next, \ACIN, SigSpec()).is_fully_zero() && nusers(port(prev, \ACOUT, SigSpec())) <= 1) { argQ = unextend(port(next, \A)); clock = port(prev, \CLK); @@ -174,6 +237,7 @@ reject_AREG: ; } endcode +// (4) Same as (3) but for BCOUT -> BCIN cascade code argQ clock BREG BREG = -1; if (next) { @@ -203,13 +267,14 @@ reject_BREG: ; } endcode +// (5) Recursively go to (2.1) until no more matches possible, recording the +// longest possible chain code if (next) { chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG); SigSpec sigC = unextend(port(next, \C)); - // TODO: Cannot use 'reject' since semioptional if (nextP_shift17) { if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) && port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) @@ -232,22 +297,41 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -260,14 +344,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -279,9 +361,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -313,6 +397,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) -- cgit v1.2.3 From ebb059896a55efacf1d90f78dbd25faff30969e2 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:53:01 -0700 Subject: Add note on pattern detector --- passes/pmgen/xilinx_dsp.pmg | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 77d4850d4..09d94ff4b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -44,9 +44,13 @@ // DSP48E1 cells inferred from multiply operations by Yosys, as well as for // user instantiations that may already contain the cells being packed... // (though the latter is currently untested) -// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each -// *REG match, it has been factored out into two subpatterns: in_dffe -// out_dffe located at the bottom of this file +// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used +// for each *REG match, it has been factored out into two subpatterns: +// in_dffe and out_dffe located at the bottom of this file. +// - Matching for pattern detector features is currently incomplete. For +// example, matching for underflow as well as overflow detection is +// possible, as would auto-reset, enabling saturated arithmetic, detecting +// custom patterns, etc. pattern xilinx_dsp_pack -- cgit v1.2.3 From 991c2ca95bfac2bedd9fd622dbef15611021a8be Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:56:37 -0700 Subject: Add comment on why we have to match for clock-enable/reset muxes --- passes/pmgen/xilinx_dsp.pmg | 5 ++++- passes/pmgen/xilinx_dsp_CREG.pmg | 4 +++- passes/pmgen/xilinx_dsp_cascade.pmg | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 09d94ff4b..604aa222b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -441,7 +441,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3f8486406..2408d483a 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -105,7 +105,9 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, this task would be handled by other Yosys passes +// such as dff2dffe, but since DSP inference happens much before this, these +// patterns have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 42d1aee6c..7a32df2b7 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -298,7 +298,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument -- cgit v1.2.3 From f90a4b1e24e36943a343bd36315b6029dd6cd044 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:57:37 -0700 Subject: Missed this --- passes/pmgen/xilinx_dsp_CREG.pmg | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 2408d483a..a57043009 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -105,9 +105,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. Typically, this task would be handled by other Yosys passes -// such as dff2dffe, but since DSP inference happens much before this, these -// patterns have to be manually identified. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument -- cgit v1.2.3 From 10d0bad67e91c45813a1185753fa4dcbcc2c86d8 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Sat, 5 Oct 2019 18:13:04 +0200 Subject: Update README.md --- passes/pmgen/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/pmgen/README.md b/passes/pmgen/README.md index 2f5b8d0b2..39560839f 100644 --- a/passes/pmgen/README.md +++ b/passes/pmgen/README.md @@ -190,7 +190,7 @@ create matches for different sections of a cell. For example: select pmux->type == $pmux slice idx GetSize(port(pmux, \S)) index port(pmux, \S)[idx] === port(eq, \Y) - set pmux_slice idx + set pmux_slice idx endmatch The first argument to `slice` is the local variable name used to identify the -- cgit v1.2.3 From 5c68da4150f8e5367138f2c7187f707b20cc19db Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 09:27:12 -0700 Subject: Missing 'accept' at end of ice40_wrapcarry, spotted by @cliffordwolf --- passes/pmgen/ice40_wrapcarry.pmg | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'passes') diff --git a/passes/pmgen/ice40_wrapcarry.pmg b/passes/pmgen/ice40_wrapcarry.pmg index 9e64c7467..bb59edb0c 100644 --- a/passes/pmgen/ice40_wrapcarry.pmg +++ b/passes/pmgen/ice40_wrapcarry.pmg @@ -9,3 +9,7 @@ match lut index port(lut, \I1) === port(carry, \I0) index port(lut, \I2) === port(carry, \I1) endmatch + +code + accept; +endcode -- cgit v1.2.3 From ea54b5ea61ee242e1dfa7f257a10095f267b8171 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Tue, 8 Oct 2019 12:41:24 -0700 Subject: Revert "Be mindful that sigmap(wire) could have dupes when checking \init" This reverts commit f46ac1df9f8847dac9d9851f2f948d93a1064ff1. --- passes/sat/sat.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'passes') diff --git a/passes/sat/sat.cc b/passes/sat/sat.cc index 93a4f225e..430bba1e8 100644 --- a/passes/sat/sat.cc +++ b/passes/sat/sat.cc @@ -265,18 +265,15 @@ struct SatHelper RTLIL::SigSpec rhs = it.second->attributes.at("\\init"); log_assert(lhs.size() == rhs.size()); - dict seen_init; RTLIL::SigSpec removed_bits; for (int i = 0; i < lhs.size(); i++) { RTLIL::SigSpec bit = lhs.extract(i, 1); - if (rhs[i] == State::Sx || !satgen.initial_state.check_all(bit) || seen_init.at(bit, rhs[i]) != rhs[i]) { + if (rhs[i] == State::Sx || !satgen.initial_state.check_all(bit)) { removed_bits.append(bit); lhs.remove(i, 1); rhs.remove(i, 1); i--; } - else - seen_init[bit] = rhs[i]; } if (removed_bits.size()) -- cgit v1.2.3 From af61d924419844b90bf6d54453489b3e41b7e353 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Wed, 16 Oct 2019 10:43:47 +0200 Subject: Disable left-over log_debug in peepopt_dffmux.pmg Signed-off-by: Clifford Wolf --- passes/pmgen/peepopt_dffmux.pmg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'passes') diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index bfd155c58..15c8dc22f 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -76,7 +76,7 @@ code int i = width-1; while (i > 1) { - log_dump(i, D[i], D[i-1], rst[i], rst[i-1], init[i], init[i-1]); + // log_dump(i, D[i], D[i-1], rst[i], rst[i-1], init[i], init[i-1]); if (D[i] != D[i-1]) break; if (!cmpx(rst[i], rst[i-1])) -- cgit v1.2.3 From bb0851bfc52455f2d93ee878f1876a691564b4ed Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Wed, 16 Oct 2019 11:40:01 +0200 Subject: Move GENERATE_PATTERN macro to separate utility header Signed-off-by: Clifford Wolf --- passes/pmgen/generate.h | 140 +++++++++++++++++++++++++++++++++++++++++++++ passes/pmgen/pmgen.py | 16 +++++- passes/pmgen/test_pmgen.cc | 129 +---------------------------------------- 3 files changed, 157 insertions(+), 128 deletions(-) create mode 100644 passes/pmgen/generate.h (limited to 'passes') diff --git a/passes/pmgen/generate.h b/passes/pmgen/generate.h new file mode 100644 index 000000000..354583de5 --- /dev/null +++ b/passes/pmgen/generate.h @@ -0,0 +1,140 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef PMGEN_GENERATE +#define PMGEN_GENERATE + +#define GENERATE_PATTERN(pmclass, pattern) \ + generate_pattern([](pmclass &pm, std::function f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) + +void pmtest_addports(Module *module) +{ + pool driven_bits, used_bits; + SigMap sigmap(module); + int icnt = 0, ocnt = 0; + + for (auto cell : module->cells()) + for (auto conn : cell->connections()) + { + if (cell->input(conn.first)) + for (auto bit : sigmap(conn.second)) + used_bits.insert(bit); + if (cell->output(conn.first)) + for (auto bit : sigmap(conn.second)) + driven_bits.insert(bit); + } + + for (auto wire : vector(module->wires())) + { + SigSpec ibits, obits; + for (auto bit : sigmap(wire)) { + if (!used_bits.count(bit)) + obits.append(bit); + if (!driven_bits.count(bit)) + ibits.append(bit); + } + if (!ibits.empty()) { + Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); + w->port_input = true; + module->connect(ibits, w); + } + if (!obits.empty()) { + Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); + w->port_output = true; + module->connect(w, obits); + } + } + + module->fixup_ports(); +} + +template +void generate_pattern(std::function)> run, const char *pmclass, const char *pattern, Design *design) +{ + log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); + + int modcnt = 0; + int maxmodcnt = 100; + int maxsubcnt = 4; + int timeout = 0; + vector mods; + + while (modcnt < maxmodcnt) + { + int submodcnt = 0, itercnt = 0, cellcnt = 0; + Module *mod = design->addModule(NEW_ID); + + while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) + { + if (timeout++ > 10000) + log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); + + pm matcher(mod, mod->cells()); + + matcher.rng(1); + matcher.rngseed += modcnt; + matcher.rng(1); + matcher.rngseed += submodcnt; + matcher.rng(1); + matcher.rngseed += itercnt; + matcher.rng(1); + matcher.rngseed += cellcnt; + matcher.rng(1); + + if (GetSize(mod->cells()) != cellcnt) + { + bool found_match = false; + run(matcher, [&](){ found_match = true; }); + cellcnt = GetSize(mod->cells()); + + if (found_match) { + Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", + pmclass, pattern, modcnt++)); + log("Creating module %s with %d cells.\n", log_id(m), cellcnt); + mod->cloneInto(m); + pmtest_addports(m); + mods.push_back(m); + submodcnt++; + timeout = 0; + } + } + + matcher.generate_mode = true; + run(matcher, [](){}); + } + + if (submodcnt && maxsubcnt < (1 << 16)) + maxsubcnt *= 2; + + design->remove(mod); + } + + Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); + log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); + for (auto mod : mods) { + Cell *c = m->addCell(mod->name, mod->name); + for (auto port : mod->ports) { + Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); + c->setPort(port, w); + } + } + pmtest_addports(m); +} + +#endif diff --git a/passes/pmgen/pmgen.py b/passes/pmgen/pmgen.py index 39a09991d..df0ffaff2 100644 --- a/passes/pmgen/pmgen.py +++ b/passes/pmgen/pmgen.py @@ -362,6 +362,7 @@ with open(outfile, "w") as f: print(" Module *module;", file=f) print(" SigMap sigmap;", file=f) print(" std::function on_accept;", file=f) + print(" bool setup_done;", file=f) print(" bool generate_mode;", file=f) print(" int accept_cnt;", file=f) print("", file=f) @@ -477,7 +478,17 @@ with open(outfile, "w") as f: print("", file=f) print(" {}_pm(Module *module, const vector &cells) :".format(prefix), file=f) - print(" module(module), sigmap(module), generate_mode(false), rngseed(12345678) {", file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" setup(cells);", file=f) + print(" }", file=f) + print("", file=f) + + print(" {}_pm(Module *module) :".format(prefix), file=f) + print(" module(module), sigmap(module), setup_done(false), generate_mode(false), rngseed(12345678) {", file=f) + print(" }", file=f) + print("", file=f) + + print(" void setup(const vector &cells) {", file=f) for current_pattern in sorted(patterns.keys()): for s, t in sorted(udata_types[current_pattern].items()): if t.endswith("*"): @@ -485,6 +496,8 @@ with open(outfile, "w") as f: else: print(" ud_{}.{} = {}();".format(current_pattern, s, t), file=f) current_pattern = None + print(" log_assert(!setup_done);", file=f) + print(" setup_done = true;", file=f) print(" for (auto port : module->ports)", file=f) print(" add_siguser(module->wire(port), nullptr);", file=f) print(" for (auto cell : module->cells())", file=f) @@ -539,6 +552,7 @@ with open(outfile, "w") as f: for current_pattern in sorted(patterns.keys()): print(" int run_{}(std::function on_accept_f) {{".format(current_pattern), file=f) + print(" log_assert(setup_done);", file=f) print(" accept_cnt = 0;", file=f) print(" on_accept = on_accept_f;", file=f) print(" rollback = 0;", file=f) diff --git a/passes/pmgen/test_pmgen.cc b/passes/pmgen/test_pmgen.cc index 4f3eec935..72dc18dcc 100644 --- a/passes/pmgen/test_pmgen.cc +++ b/passes/pmgen/test_pmgen.cc @@ -23,13 +23,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -// for peepopt_pm -bool did_something; - #include "passes/pmgen/test_pmgen_pm.h" #include "passes/pmgen/ice40_dsp_pm.h" #include "passes/pmgen/xilinx_srl_pm.h" -#include "passes/pmgen/peepopt_pm.h" + +#include "generate.h" void reduce_chain(test_pmgen_pm &pm) { @@ -118,123 +116,6 @@ void opt_eqpmux(test_pmgen_pm &pm) log(" -> %s (%s)\n", log_id(c), log_id(c->type)); } -#define GENERATE_PATTERN(pmclass, pattern) \ - generate_pattern([](pmclass &pm, std::function f){ return pm.run_ ## pattern(f); }, #pmclass, #pattern, design) - -void pmtest_addports(Module *module) -{ - pool driven_bits, used_bits; - SigMap sigmap(module); - int icnt = 0, ocnt = 0; - - for (auto cell : module->cells()) - for (auto conn : cell->connections()) - { - if (cell->input(conn.first)) - for (auto bit : sigmap(conn.second)) - used_bits.insert(bit); - if (cell->output(conn.first)) - for (auto bit : sigmap(conn.second)) - driven_bits.insert(bit); - } - - for (auto wire : vector(module->wires())) - { - SigSpec ibits, obits; - for (auto bit : sigmap(wire)) { - if (!used_bits.count(bit)) - obits.append(bit); - if (!driven_bits.count(bit)) - ibits.append(bit); - } - if (!ibits.empty()) { - Wire *w = module->addWire(stringf("\\i%d", icnt++), GetSize(ibits)); - w->port_input = true; - module->connect(ibits, w); - } - if (!obits.empty()) { - Wire *w = module->addWire(stringf("\\o%d", ocnt++), GetSize(obits)); - w->port_output = true; - module->connect(w, obits); - } - } - - module->fixup_ports(); -} - -template -void generate_pattern(std::function)> run, const char *pmclass, const char *pattern, Design *design) -{ - log("Generating \"%s\" patterns for pattern matcher \"%s\".\n", pattern, pmclass); - - int modcnt = 0; - int maxmodcnt = 100; - int maxsubcnt = 4; - int timeout = 0; - vector mods; - - while (modcnt < maxmodcnt) - { - int submodcnt = 0, itercnt = 0, cellcnt = 0; - Module *mod = design->addModule(NEW_ID); - - while (modcnt < maxmodcnt && submodcnt < maxsubcnt && itercnt++ < 1000) - { - if (timeout++ > 10000) - log_error("pmgen generator is stuck: 10000 iterations with no matching module generated.\n"); - - pm matcher(mod, mod->cells()); - - matcher.rng(1); - matcher.rngseed += modcnt; - matcher.rng(1); - matcher.rngseed += submodcnt; - matcher.rng(1); - matcher.rngseed += itercnt; - matcher.rng(1); - matcher.rngseed += cellcnt; - matcher.rng(1); - - if (GetSize(mod->cells()) != cellcnt) - { - bool found_match = false; - run(matcher, [&](){ found_match = true; }); - cellcnt = GetSize(mod->cells()); - - if (found_match) { - Module *m = design->addModule(stringf("\\pmtest_%s_%s_%05d", - pmclass, pattern, modcnt++)); - log("Creating module %s with %d cells.\n", log_id(m), cellcnt); - mod->cloneInto(m); - pmtest_addports(m); - mods.push_back(m); - submodcnt++; - timeout = 0; - } - } - - matcher.generate_mode = true; - run(matcher, [](){}); - } - - if (submodcnt && maxsubcnt < (1 << 16)) - maxsubcnt *= 2; - - design->remove(mod); - } - - Module *m = design->addModule(stringf("\\pmtest_%s_%s", pmclass, pattern)); - log("Creating module %s with %d cells.\n", log_id(m), GetSize(mods)); - for (auto mod : mods) { - Cell *c = m->addCell(mod->name, mod->name); - for (auto port : mod->ports) { - Wire *w = m->addWire(NEW_ID, GetSize(mod->wire(port))); - c->setPort(port, w); - } - } - pmtest_addports(m); -} - struct TestPmgenPass : public Pass { TestPmgenPass() : Pass("test_pmgen", "test pass for pmgen") { } void help() YS_OVERRIDE @@ -355,12 +236,6 @@ struct TestPmgenPass : public Pass { if (pattern == "xilinx_srl.variable") return GENERATE_PATTERN(xilinx_srl_pm, variable); - if (pattern == "peepopt-muldiv") - return GENERATE_PATTERN(peepopt_pm, muldiv); - - if (pattern == "peepopt-shiftmul") - return GENERATE_PATTERN(peepopt_pm, shiftmul); - log_cmd_error("Unknown pattern: %s\n", pattern.c_str()); } -- cgit v1.2.3 From b8774ae849cb6aa54a852a245f8634afaac1eb76 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Wed, 16 Oct 2019 11:40:32 +0200 Subject: Fix dffmux peepopt init handling Signed-off-by: Clifford Wolf --- passes/pmgen/peepopt.cc | 76 +++++++++++++++++++++++++++++++++++++---- passes/pmgen/peepopt_dffmux.pmg | 64 +++++++++++++++++++++++----------- 2 files changed, 113 insertions(+), 27 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index 72b02127a..2230145df 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -24,8 +24,11 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN bool did_something; +dict initbits; +pool rminitbits; #include "passes/pmgen/peepopt_pm.h" +#include "generate.h" struct PeepoptPass : public Pass { PeepoptPass() : Pass("peepopt", "collection of peephole optimizers") { } @@ -40,27 +43,86 @@ struct PeepoptPass : public Pass { } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE { + std::string genmode; + log_header(design, "Executing PEEPOPT pass (run peephole optimizers).\n"); size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-singleton") { - // singleton_mode = true; - // continue; - // } + if (args[argidx] == "-generate" && argidx+1 < args.size()) { + genmode = args[++argidx]; + continue; + } break; } extra_args(args, argidx, design); - for (auto module : design->selected_modules()) { + if (!genmode.empty()) + { + initbits.clear(); + rminitbits.clear(); + + if (genmode == "shiftmul") + GENERATE_PATTERN(peepopt_pm, shiftmul); + else if (genmode == "muldiv") + GENERATE_PATTERN(peepopt_pm, muldiv); + else if (genmode == "dffmux") + GENERATE_PATTERN(peepopt_pm, dffmux); + else + log_abort(); + return; + } + + for (auto module : design->selected_modules()) + { did_something = true; - while (did_something) { + + while (did_something) + { did_something = false; - peepopt_pm pm(module, module->selected_cells()); + initbits.clear(); + rminitbits.clear(); + + peepopt_pm pm(module); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (sig[i].wire == nullptr) + continue; + if (val[i] != State::S0 && val[i] != State::S1) + continue; + initbits[sig[i]] = val[i]; + } + } + } + + pm.setup(module->selected_cells()); + pm.run_shiftmul(); pm.run_muldiv(); pm.run_dffmux(); + + for (auto w : module->wires()) { + auto it = w->attributes.find(ID(init)); + if (it != w->attributes.end()) { + SigSpec sig = pm.sigmap(w); + Const &val = it->second; + int len = std::min(GetSize(sig), GetSize(val)); + for (int i = 0; i < len; i++) { + if (rminitbits.count(sig[i])) + val[i] = State::Sx; + } + } + } + + initbits.clear(); + rminitbits.clear(); } } } diff --git a/passes/pmgen/peepopt_dffmux.pmg b/passes/pmgen/peepopt_dffmux.pmg index 15c8dc22f..0069b0570 100644 --- a/passes/pmgen/peepopt_dffmux.pmg +++ b/passes/pmgen/peepopt_dffmux.pmg @@ -60,12 +60,13 @@ code SigSpec Q = port(dff, \Q); int width = GetSize(D); - SigSpec &dffD = dff->connections_.at(\D); - SigSpec &dffQ = dff->connections_.at(\Q); - Const init; - for (const auto &b : Q) { - auto it = b.wire->attributes.find(\init); - init.bits.push_back(it == b.wire->attributes.end() ? State::Sx : it->second[b.offset]); + SigSpec dffD = dff->getPort(\D); + SigSpec dffQ = dff->getPort(\Q); + + Const initval; + for (auto b : Q) { + auto it = initbits.find(b); + initval.bits.push_back(it == initbits.end() ? State::Sx : it->second); } auto cmpx = [=](State lhs, State rhs) { @@ -76,56 +77,68 @@ code int i = width-1; while (i > 1) { - // log_dump(i, D[i], D[i-1], rst[i], rst[i-1], init[i], init[i-1]); if (D[i] != D[i-1]) break; if (!cmpx(rst[i], rst[i-1])) break; - if (!cmpx(init[i], init[i-1])) + if (!cmpx(initval[i], initval[i-1])) break; - if (!cmpx(rst[i], init[i])) + if (!cmpx(rst[i], initval[i])) break; + rminitbits.insert(Q[i]); module->connect(Q[i], Q[i-1]); i--; } if (i < width-1) { did_something = true; if (cemux) { - SigSpec &ceA = cemux->connections_.at(\A); - SigSpec &ceB = cemux->connections_.at(\B); - SigSpec &ceY = cemux->connections_.at(\Y); + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); ceA.remove(i, width-1-i); ceB.remove(i, width-1-i); ceY.remove(i, width-1-i); + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); cemux->fixup_parameters(); + blacklist(cemux); } if (rstmux) { - SigSpec &rstA = rstmux->connections_.at(\A); - SigSpec &rstB = rstmux->connections_.at(\B); - SigSpec &rstY = rstmux->connections_.at(\Y); + SigSpec rstA = rstmux->getPort(\A); + SigSpec rstB = rstmux->getPort(\B); + SigSpec rstY = rstmux->getPort(\Y); rstA.remove(i, width-1-i); rstB.remove(i, width-1-i); rstY.remove(i, width-1-i); + rstmux->setPort(\A, rstA); + rstmux->setPort(\B, rstB); + rstmux->setPort(\Y, rstY); rstmux->fixup_parameters(); + blacklist(rstmux); } dffD.remove(i, width-1-i); dffQ.remove(i, width-1-i); + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); + blacklist(dff); log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed top %d bits.\n", log_id(module), log_id(dff), log_id(cemux, "n/a"), log_id(rstmux, "n/a"), width-1-i); width = i+1; } if (cemux) { - SigSpec &ceA = cemux->connections_.at(\A); - SigSpec &ceB = cemux->connections_.at(\B); - SigSpec &ceY = cemux->connections_.at(\Y); + SigSpec ceA = cemux->getPort(\A); + SigSpec ceB = cemux->getPort(\B); + SigSpec ceY = cemux->getPort(\Y); int count = 0; for (int i = width-1; i >= 0; i--) { if (D[i].wire) continue; - if (cmpx(rst[i], D[i].data) && cmpx(init[i], D[i].data)) { + if (cmpx(rst[i], D[i].data) && cmpx(initval[i], D[i].data)) { count++; + rminitbits.insert(Q[i]); module->connect(Q[i], D[i]); ceA.remove(i); ceB.remove(i); @@ -134,10 +147,21 @@ code dffQ.remove(i); } } - if (count > 0) { + if (count > 0) + { did_something = true; + + cemux->setPort(\A, ceA); + cemux->setPort(\B, ceB); + cemux->setPort(\Y, ceY); cemux->fixup_parameters(); + blacklist(cemux); + + dff->setPort(\D, dffD); + dff->setPort(\Q, dffQ); dff->fixup_parameters(); + blacklist(dff); + log("dffcemux pattern in %s: dff=%s, cemux=%s, rstmux=%s; removed %d constant bits.\n", log_id(module), log_id(dff), log_id(cemux), log_id(rstmux, "n/a"), count); } } -- cgit v1.2.3