From cf82b38478e598c915d14d595b554fc122034850 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:40:34 -0700 Subject: Add comments for xilinx_dsp --- passes/pmgen/xilinx_dsp.cc | 14 ++++-- passes/pmgen/xilinx_dsp.pmg | 93 +++++++++++++++++++++++++++++++++++++++- passes/pmgen/xilinx_dsp_CREG.pmg | 33 +++++++++++++- 3 files changed, 134 insertions(+), 6 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 11c7e5ea8..489887207 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -608,8 +608,13 @@ struct XilinxDspPass : public Pass { extra_args(args, argidx, design); for (auto module : design->selected_modules()) { + // Experimental feature: pack $add/$sub cells with + // (* use_dsp48="simd" *) into DSP48E1's using its + // SIMD feature xilinx_simd_pack(module, module->selected_cells()); + // Match for all features ([ABDMP][12]?REG, pre-adder, + // (post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); @@ -618,14 +623,17 @@ struct XilinxDspPass : public Pass { // is no guarantee that the cell ordering corresponds // to the "expected" case (i.e. the order in which // they appear in the source) thus the possiblity - // existed that a register got packed as CREG into a + // existed that a register got packed as a CREG into a // downstream DSP that should have otherwise been a - // PREG of an upstream DSP that had not been pattern - // matched yet + // PREG of an upstream DSP that had not been visited + // yet { xilinx_dsp_CREG_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_packC(xilinx_dsp_packC); } + // Lastly, identify and utilise PCOUT -> PCIN, + // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade + // chains { xilinx_dsp_cascade_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_cascade(); diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 4e174e753..bcf966a8a 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -1,3 +1,53 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// ( 1) Starting from a DSP48E1 cell +// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If ADREG matched, treat 'A' input as input of ADREG +// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// If A2REG, then match 'A' input for A1REG +// ( 5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached +// to the 'P' output where it is compared to a constant that is a +// power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +// Notes: +// - The intention of this pattern matcher is for it to be compatible with +// DSP48E1 cells inferred from multiply operations by Yosys, as well as for +// user instantiations that may already contain the cells being packed... +// (though the latter is currently untested) +// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each +// *REG match, it has been factored out into two subpatterns: in_dffe +// out_dffe located at the bottom of this file + pattern xilinx_dsp_pack state clock @@ -5,12 +55,11 @@ state sigA sigB sigC sigD sigM sigP state postAddAB postAddMuxAB state ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol - state ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset @@ -19,6 +68,7 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch @@ -53,6 +103,7 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; + // TODO: Check if necessary // This sigM could have no users if downstream $add // is narrower than $mul result, for example if (sigM.empty()) @@ -61,6 +112,10 @@ code sigA sigB sigC sigD sigM clock clock = port(dsp, \CLK, SigBit()); endcode +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; @@ -81,6 +136,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock } endcode +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used @@ -103,6 +160,7 @@ match preAdd endmatch code sigA sigD + // TODO: Check if this is necessary? if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); @@ -111,6 +169,9 @@ code sigA sigD } endcode +// (4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) @@ -173,6 +234,8 @@ ffA1_end: ; } endcode +// (5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; @@ -222,6 +285,7 @@ ffB1_end: ; } endcode +// (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; @@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock } endcode +// (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; @@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock sigP = sigM; endcode +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() @@ -291,6 +361,7 @@ code sigC sigP } endcode +// (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; @@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock } endcode +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ match postAddMux if postAdd if ffP @@ -333,6 +417,11 @@ code sigC sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +// the 'P' output where it is compared to a constant that is a power-of-2: +// e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a31dc80bf..a20d3cdce 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,3 +1,25 @@ +// This file describes the second of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// Notes: +// - Separating out CREG packing is necessary since there is no guarantee +// that the cell ordering corresponds to the "expected" case (i.e. the order +// in which they appear in the source) thus the possiblity existed that a +// register got packed as a CREG into a downstream DSP that should have +// otherwise been a PREG of an upstream DSP that had not been visited yet +// - The reason this is separated out from the xilinx_dsp.pmg file is +// for efficiency --- each *.pmg file creates a class of the same basename, +// which when constructed, creates a custom database tailored to the +// pattern(s) contained within. Since the pattern in this file must be +// executed after the pattern contained in xilinx_dsp.pmg, it is necessary +// to reconstruct this database. Separating the two patterns into +// independent files causes two smaller, more specific, databases. + pattern xilinx_dsp_packC udata > unextend @@ -15,13 +37,15 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port match dsp select dsp->type.in(\DSP48E1) select param(dsp, \CREG, 1).as_int() == 0 select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock +code sigC sigP unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -47,7 +71,14 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock } else sigP = P; +endcode +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + // TODO: Any downside to allowing this? + // If this DSP implements an accumulator, do not attempt to match if (sigC == sigP) reject; -- cgit v1.2.3 From 983068103e24c33a1b70eb90dd72fdfaf292e1bd Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:43:19 -0700 Subject: Consistency --- passes/pmgen/xilinx_dsp_CREG.pmg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a20d3cdce..38a5a8d24 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -45,7 +45,7 @@ match dsp select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code sigC sigP +code sigC sigP clock unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -71,6 +71,8 @@ code sigC sigP } else sigP = P; + + clock = port(dsp, \CLK, SigBit()); endcode // (2) Match the driver of the 'C' input to a possible $dff cell (CREG) @@ -82,8 +84,6 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock if (sigC == sigP) reject; - clock = port(dsp, \CLK, SigBit()); - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 7de9c33931020068b285e262bbf239385fcb5c2d Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:43:56 -0700 Subject: Fix TODOs --- passes/pmgen/xilinx_dsp.pmg | 15 --------------- passes/pmgen/xilinx_dsp_CREG.pmg | 5 ----- 2 files changed, 20 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index bcf966a8a..6b6151564 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -103,11 +103,6 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; - // TODO: Check if necessary - // This sigM could have no users if downstream $add - // is narrower than $mul result, for example - if (sigM.empty()) - reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -159,16 +154,6 @@ match preAdd optional endmatch -code sigA sigD - // TODO: Check if this is necessary? - if (preAdd) { - sigA = port(preAdd, \A); - sigD = port(preAdd, \B); - if (GetSize(sigA) < GetSize(sigD)) - std::swap(sigA, sigD); - } -endcode - // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 38a5a8d24..5697ee737 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,11 +79,6 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock - // TODO: Any downside to allowing this? - // If this DSP implements an accumulator, do not attempt to match - if (sigC == sigP) - reject; - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 6d689726193db4d46f7618ff00707e4f30366ad5 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:31:44 -0700 Subject: More comments, cleanup --- passes/pmgen/xilinx_dsp.pmg | 106 +++++++++++++++++++++++++++------------ passes/pmgen/xilinx_dsp_CREG.pmg | 43 ++++++++++++---- 2 files changed, 108 insertions(+), 41 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 6b6151564..3523db3a4 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -425,22 +425,42 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -453,14 +473,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -472,9 +490,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -506,6 +526,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) @@ -530,16 +554,32 @@ endcode // ####################### +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. +// At a high level: +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) subpattern out_dffe arg argD argQ clock code dff = nullptr; for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; endcode +// (1) Starting from an optional $mux cell that implements clock enable +// semantics --- one where the given 'D' argument (partially or fully) +// drives one of its two inputs match ffcemux select ffcemux->type.in($mux) // ffcemux output must have two users: ffcemux and ff.D @@ -578,6 +618,10 @@ code argD argQ } endcode +// (2) Starting from, or continuing onto, another optional $mux cell that +// implements synchronous reset semantics --- one where the given 'D' +// argument (or the clock enable $mux output) drives one of its two inputs +// and where the other input is fully zero match ffrstmux select ffrstmux->type.in($mux) // ffrstmux output must have two users: ffrstmux and ff.D @@ -616,6 +660,8 @@ code argD argQ } endcode +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +// output of the previous clock enable or reset $mux cells) match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -632,32 +678,30 @@ match ff // Check that FF.Q is connected to CE-mux filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ - if (ff) { - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - - SigSpec D = port(ff, \D); - SigSpec Q = port(ff, \Q); - if (!ffcemux) { - argQ = argD; - argQ.replace(D, Q); - } - - for (auto c : argQ.chunks()) { - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; - } + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + if (!ffcemux) { + argQ = argD; + argQ.replace(D, Q); + } - dff = ff; - dffQ = argQ; - dffclock = port(ff, \CLK); + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + for (auto c : argQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } - // No enable/reset mux possible without flop - else if (dffcemux || dffrstmux) - reject; + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 5697ee737..3d911b478 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -77,7 +77,7 @@ endcode // (2) Match the driver of the 'C' input to a possible $dff cell (CREG) // (attached to at most two $mux cells that implement clock-enable or -// reset functionality, using a subpattern discussed below) +// reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock argQ = sigC; subpattern(in_dffe); @@ -103,22 +103,41 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -131,14 +150,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -150,9 +167,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -184,6 +203,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) -- cgit v1.2.3 From 52583ecff82eb9dc78e10b7bfd33c1be3d4dcc67 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:33:27 -0700 Subject: Revert "Fix TODOs" This reverts commit 8674a6c68d563908014d16671567459499c6dc99. --- passes/pmgen/xilinx_dsp.pmg | 15 +++++++++++++++ passes/pmgen/xilinx_dsp_CREG.pmg | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 3523db3a4..8a2c2caf5 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -103,6 +103,11 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; + // TODO: Check if necessary + // This sigM could have no users if downstream $add + // is narrower than $mul result, for example + if (sigM.empty()) + reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -154,6 +159,16 @@ match preAdd optional endmatch +code sigA sigD + // TODO: Check if this is necessary? + if (preAdd) { + sigA = port(preAdd, \A); + sigD = port(preAdd, \B); + if (GetSize(sigA) < GetSize(sigD)) + std::swap(sigA, sigD); + } +endcode + // (4) If pre-adder was present, find match 'A' input for A2REG // If pre-adder was not present, move ADREG to A2REG // Then match 'A' input for A1REG diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3d911b478..b87a686a1 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,6 +79,11 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + // TODO: Any downside to allowing this? + // If this DSP implements an accumulator, do not attempt to match + if (sigC == sigP) + reject; + argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 77d7a5c14a6cf7c16b69338ed91d1b9166dba065 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 13:38:09 -0700 Subject: Retry on fixing TODOs --- passes/pmgen/xilinx_dsp.pmg | 9 +-------- passes/pmgen/xilinx_dsp_CREG.pmg | 5 ----- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 8a2c2caf5..dbc3f7455 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -100,14 +100,10 @@ code sigA sigB sigC sigD sigM clock sigM.append(P[i]); } log_assert(nusers(P.extract_end(i)) <= 1); + log_assert(!sigM.empty()); } else sigM = P; - // TODO: Check if necessary - // This sigM could have no users if downstream $add - // is narrower than $mul result, for example - if (sigM.empty()) - reject; clock = port(dsp, \CLK, SigBit()); endcode @@ -160,12 +156,9 @@ match preAdd endmatch code sigA sigD - // TODO: Check if this is necessary? if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); - if (GetSize(sigA) < GetSize(sigD)) - std::swap(sigA, sigD); } endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index b87a686a1..3d911b478 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -79,11 +79,6 @@ endcode // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using the in_dffe subpattern) code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock - // TODO: Any downside to allowing this? - // If this DSP implements an accumulator, do not attempt to match - if (sigC == sigP) - reject; - argQ = sigC; subpattern(in_dffe); if (dff) { -- cgit v1.2.3 From 8027ebf05b7538e501b4903cab9c2ce6a23610ff Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 21:42:46 -0700 Subject: Restore optimisation for sigM.empty() --- passes/pmgen/xilinx_dsp.pmg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index dbc3f7455..77d4850d4 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -100,7 +100,10 @@ code sigA sigB sigC sigD sigM clock sigM.append(P[i]); } log_assert(nusers(P.extract_end(i)) <= 1); - log_assert(!sigM.empty()); + // This sigM could have no users if downstream sinks (e.g. $add) is + // narrower than $mul result, for example + if (sigM.empty()) + reject; } else sigM = P; -- cgit v1.2.3 From 14e4aeece6adc0808ab1876f01752acb0833185a Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 21:45:31 -0700 Subject: Fix comment --- passes/pmgen/xilinx_dsp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 489887207..886e01c0f 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -614,7 +614,7 @@ struct XilinxDspPass : public Pass { xilinx_simd_pack(module, module->selected_cells()); // Match for all features ([ABDMP][12]?REG, pre-adder, - // (post-adder, pattern detector, etc.) except for CREG + // post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); -- cgit v1.2.3 From 12fd2ec4f05ce5efda2a2d2e4d37aef013f2baf9 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 22:24:15 -0700 Subject: Improve comments for xilinx_dsp_CREG --- passes/pmgen/xilinx_dsp_CREG.pmg | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3d911b478..3f8486406 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -7,11 +7,12 @@ // (attached to at most two $mux cells that implement clock-enable or // reset functionality, using a subpattern discussed below) // Notes: -// - Separating out CREG packing is necessary since there is no guarantee -// that the cell ordering corresponds to the "expected" case (i.e. the order -// in which they appear in the source) thus the possiblity existed that a -// register got packed as a CREG into a downstream DSP that should have -// otherwise been a PREG of an upstream DSP that had not been visited yet +// - Running CREG packing after xilinx_dsp_pack is necessary since there is no +// guarantee that the cell ordering corresponds to the "expected" case (i.e. +// the order in which they appear in the source) thus the possiblity existed +// that a register got packed as a CREG into a downstream DSP that should +// have otherwise been a PREG of an upstream DSP that had not been visited +// yet // - The reason this is separated out from the xilinx_dsp.pmg file is // for efficiency --- each *.pmg file creates a class of the same basename, // which when constructed, creates a custom database tailored to the @@ -28,7 +29,7 @@ state sigC sigP state ffCcepol ffCrstpol state ffC ffCcemux ffCrstmux -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset -- cgit v1.2.3 From 792cd31052d32d661a995df0c46f5cb9f27b566b Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 22:25:30 -0700 Subject: Add comments for xilinx_dsp_cascade --- passes/pmgen/xilinx_dsp_cascade.pmg | 112 ++++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 12 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 6f4ac5849..42d1aee6c 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -1,3 +1,46 @@ +// This file describes the third of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP +// (4) Same as (3) but for BCOUT -> BCIN cascade +// (5) Recursively go to (2.1) until no more matches possible, keeping track +// of the longest possible chain found +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate +// Notes: +// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered +// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need +// not be the case --- [AB] cascades can exist independently of a P cascade +// (though all three cascades must come from the same DSP). This situation +// is not handled currently. +// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently +// conservative in that they examine the situation where (a) the previous +// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no +// registers enabled, and (c) that there exists only one additional register +// between the upstream and downstream DSPs. This can certainly be relaxed +// to identify situations ranging from (i) neither DSP uses any registers, +// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and +// there exists a further 2 registers between them. This remains a TODO +// item. + pattern xilinx_dsp_cascade udata > unextend @@ -6,7 +49,7 @@ state next state clock state AREG BREG -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset @@ -19,12 +62,19 @@ code #define MAX_DSP_CASCADE 20 endcode +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +// use the 'PCOUT' port match first select first->type.in(\DSP48E1) select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") select nusers(port(first, \PCOUT, SigSpec())) <= 1 endmatch +// (6) The longest chain is then divided into chunks of no more than +// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +// height of a DSP column) with each DSP in each chunk being rewritten +// to use [ABP]COUT -> [ABP]CIN cascading as appropriate code longest_chain.clear(); chain.emplace_back(first, -1, -1, -1); @@ -106,6 +156,10 @@ subpattern tail arg first arg next +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (b) has its Z multiplexer output set to the 'C' port, which is +// driven by the 'P' output of the previous DSP cell, and (c) has its +// 'PCIN' port unused match nextP select nextP->type.in(\DSP48E1) select !param(nextP, \CREG, State::S1).as_bool() @@ -116,6 +170,8 @@ match nextP semioptional endmatch +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +// previous DSP cell right-shifted by 17 bits match nextP_shift17 if !nextP select nextP_shift17->type.in(\DSP48E1) @@ -145,6 +201,14 @@ code next } endcode +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +// use its ACOUT port, then examine if an ACOUT -> ACIN cascade +// opportunity exists by matching for a $dff-with-optional-clock-enable- +// or-reset and checking that the 'D' input of this register is the same +// as the 'A' input of the previous DSP code argQ clock AREG AREG = -1; if (next) { @@ -152,7 +216,6 @@ code argQ clock AREG if (param(prev, \AREG, 2).as_int() > 0 && param(next, \AREG, 2).as_int() > 0 && param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && - port(next, \ACIN, SigSpec()).is_fully_zero() && nusers(port(prev, \ACOUT, SigSpec())) <= 1) { argQ = unextend(port(next, \A)); clock = port(prev, \CLK); @@ -174,6 +237,7 @@ reject_AREG: ; } endcode +// (4) Same as (3) but for BCOUT -> BCIN cascade code argQ clock BREG BREG = -1; if (next) { @@ -203,13 +267,14 @@ reject_BREG: ; } endcode +// (5) Recursively go to (2.1) until no more matches possible, recording the +// longest possible chain code if (next) { chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG); SigSpec sigC = unextend(port(next, \C)); - // TODO: Cannot use 'reject' since semioptional if (nextP_shift17) { if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) && port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) @@ -232,22 +297,41 @@ endcode // ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' input. +// At a high level: +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// one that exclusively drives the 'D' input of the $dff, with one of its +// $mux inputs being fully zero +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff subpattern in_dffe arg argD argQ clock code dff = nullptr; - for (auto c : argQ.chunks()) { + for (const auto &c : argQ.chunks()) { + // Abandon matches when 'Q' is a constant if (!c.wire) reject; + // Abandon matches when 'Q' has the keep attribute set if (c.wire->get_bool_attribute(\keep)) reject; - Const init = c.wire->attributes.at(\init, State::Sx); - if (!init.is_fully_undef() && !init.is_fully_zero()) - reject; + // Abandon matches when 'Q' has a non-zero init attribute set + // (not supported by DSP48E1) + Const init = c.wire->attributes.at(\init, Const()); + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; } endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +// 'Q' argument match ff select ff->type.in($dff) // DSP48E1 does not support clock inversion @@ -260,14 +344,12 @@ match ff filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + filter clock == SigBit() || port(ff, \CLK) == clock + set ffoffset offset endmatch code argQ argD -{ - if (clock != SigBit() && port(ff, \CLK) != clock) - reject; - SigSpec Q = port(ff, \Q); dff = ff; dffclock = port(ff, \CLK); @@ -279,9 +361,11 @@ code argQ argD // has two (ff, ffrstmux) users if (nusers(dffD) > 2) argD = SigSpec(); -} endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +// exclusively drives the 'D' input of the $dff, with one of the $mux +// inputs being fully zero match ffrstmux if !argD.empty() select ffrstmux->type.in($mux) @@ -313,6 +397,10 @@ code argD dffrstmux = nullptr; endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +// exclusively drives the 'D' input of the $dff (or the other input of +// the reset $mux) and where one of this $mux's inputs is connected to +// the 'Q' output of the $dff match ffcemux if !argD.empty() select ffcemux->type.in($mux) -- cgit v1.2.3 From 6c5e1234e19159b7577a5e64a7a463142160f7ff Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 22:30:14 -0700 Subject: Add comment on why partial multipliers are 18x18 --- techlibs/xilinx/synth_xilinx.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc index 41429b338..4fe287744 100644 --- a/techlibs/xilinx/synth_xilinx.cc +++ b/techlibs/xilinx/synth_xilinx.cc @@ -342,10 +342,14 @@ struct SynthXilinxPass : public ScriptPass if (check_label("map_dsp", "(skip if '-nodsp')")) { if (!nodsp || help_mode) { // NB: Xilinx multipliers are signed only - run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " - "-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers - "-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller - "-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18"); + run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 " + "-D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " // Partial multipliers are intentionally + // limited to 18x18 in order to take + // advantage of the (PCOUT << 17) -> PCIN + // dedicated cascade chain capability + "-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers + "-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller + "-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18"); run("select a:mul2dsp"); run("setattr -unset mul2dsp"); run("opt_expr -fine"); -- cgit v1.2.3 From ebb059896a55efacf1d90f78dbd25faff30969e2 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:53:01 -0700 Subject: Add note on pattern detector --- passes/pmgen/xilinx_dsp.pmg | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 77d4850d4..09d94ff4b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -44,9 +44,13 @@ // DSP48E1 cells inferred from multiply operations by Yosys, as well as for // user instantiations that may already contain the cells being packed... // (though the latter is currently untested) -// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each -// *REG match, it has been factored out into two subpatterns: in_dffe -// out_dffe located at the bottom of this file +// - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used +// for each *REG match, it has been factored out into two subpatterns: +// in_dffe and out_dffe located at the bottom of this file. +// - Matching for pattern detector features is currently incomplete. For +// example, matching for underflow as well as overflow detection is +// possible, as would auto-reset, enabling saturated arithmetic, detecting +// custom patterns, etc. pattern xilinx_dsp_pack -- cgit v1.2.3 From 991c2ca95bfac2bedd9fd622dbef15611021a8be Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:56:37 -0700 Subject: Add comment on why we have to match for clock-enable/reset muxes --- passes/pmgen/xilinx_dsp.pmg | 5 ++++- passes/pmgen/xilinx_dsp_CREG.pmg | 4 +++- passes/pmgen/xilinx_dsp_cascade.pmg | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 09d94ff4b..604aa222b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -441,7 +441,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 3f8486406..2408d483a 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -105,7 +105,9 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, this task would be handled by other Yosys passes +// such as dff2dffe, but since DSP inference happens much before this, these +// patterns have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 42d1aee6c..7a32df2b7 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -298,7 +298,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument -- cgit v1.2.3 From f90a4b1e24e36943a343bd36315b6029dd6cd044 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Sat, 5 Oct 2019 08:57:37 -0700 Subject: Missed this --- passes/pmgen/xilinx_dsp_CREG.pmg | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 2408d483a..a57043009 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -105,9 +105,10 @@ endcode // ####################### // Subpattern for matching against input registers, based on knowledge of the -// 'Q' input. Typically, this task would be handled by other Yosys passes -// such as dff2dffe, but since DSP inference happens much before this, these -// patterns have to be manually identified. +// 'Q' input. Typically, identifying registers with clock-enable and reset +// capability would be a task would be handled by other Yosys passes such as +// dff2dffe, but since DSP inference happens much before this, these patterns +// have to be manually identified. // At a high level: // (1) Starting from a $dff cell that (partially or fully) drives the given // 'Q' argument -- cgit v1.2.3