From cf82b38478e598c915d14d595b554fc122034850 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Fri, 4 Oct 2019 12:40:34 -0700 Subject: Add comments for xilinx_dsp --- passes/pmgen/xilinx_dsp.cc | 14 ++++-- passes/pmgen/xilinx_dsp.pmg | 93 +++++++++++++++++++++++++++++++++++++++- passes/pmgen/xilinx_dsp_CREG.pmg | 33 +++++++++++++- 3 files changed, 134 insertions(+), 6 deletions(-) (limited to 'passes') diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 11c7e5ea8..489887207 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -608,8 +608,13 @@ struct XilinxDspPass : public Pass { extra_args(args, argidx, design); for (auto module : design->selected_modules()) { + // Experimental feature: pack $add/$sub cells with + // (* use_dsp48="simd" *) into DSP48E1's using its + // SIMD feature xilinx_simd_pack(module, module->selected_cells()); + // Match for all features ([ABDMP][12]?REG, pre-adder, + // (post-adder, pattern detector, etc.) except for CREG { xilinx_dsp_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_pack(xilinx_dsp_pack); @@ -618,14 +623,17 @@ struct XilinxDspPass : public Pass { // is no guarantee that the cell ordering corresponds // to the "expected" case (i.e. the order in which // they appear in the source) thus the possiblity - // existed that a register got packed as CREG into a + // existed that a register got packed as a CREG into a // downstream DSP that should have otherwise been a - // PREG of an upstream DSP that had not been pattern - // matched yet + // PREG of an upstream DSP that had not been visited + // yet { xilinx_dsp_CREG_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_packC(xilinx_dsp_packC); } + // Lastly, identify and utilise PCOUT -> PCIN, + // ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade + // chains { xilinx_dsp_cascade_pm pm(module, module->selected_cells()); pm.run_xilinx_dsp_cascade(); diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 4e174e753..bcf966a8a 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -1,3 +1,53 @@ +// This file describes the main pattern matcher setup (of three total) that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// ( 1) Starting from a DSP48E1 cell +// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// If ADREG matched, treat 'A' input as input of ADREG +// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) +// ( 4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// If A2REG, then match 'A' input for A1REG +// ( 5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG +// ( 6) Match 'D' input for DREG +// ( 7) Match 'P' output that exclusively drives an MREG +// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). +// ( 9) Match 'P' output that exclusively drives a PREG +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached +// to the 'P' output where it is compared to a constant that is a +// power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function +// Notes: +// - The intention of this pattern matcher is for it to be compatible with +// DSP48E1 cells inferred from multiply operations by Yosys, as well as for +// user instantiations that may already contain the cells being packed... +// (though the latter is currently untested) +// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each +// *REG match, it has been factored out into two subpatterns: in_dffe +// out_dffe located at the bottom of this file + pattern xilinx_dsp_pack state clock @@ -5,12 +55,11 @@ state sigA sigB sigC sigD sigM sigP state postAddAB postAddMuxAB state ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol - state ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux -// subpattern +// Variables used for subpatterns state argQ argD state ffcepol ffrstpol state ffoffset @@ -19,6 +68,7 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell match dsp select dsp->type.in(\DSP48E1) endmatch @@ -53,6 +103,7 @@ code sigA sigB sigC sigD sigM clock } else sigM = P; + // TODO: Check if necessary // This sigM could have no users if downstream $add // is narrower than $mul result, for example if (sigM.empty()) @@ -61,6 +112,10 @@ code sigA sigB sigC sigD sigM clock clock = port(dsp, \CLK, SigBit()); endcode +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed above) +// If matched, treat 'A' input as input of ADREG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock if (param(dsp, \ADREG).as_int() == 0) { argQ = sigA; @@ -81,6 +136,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock } endcode +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +// (pre-adder) match preAdd if sigD.empty() || sigD.is_fully_zero() // Ensure that preAdder not already used @@ -103,6 +160,7 @@ match preAdd endmatch code sigA sigD + // TODO: Check if this is necessary? if (preAdd) { sigA = port(preAdd, \A); sigD = port(preAdd, \B); @@ -111,6 +169,9 @@ code sigA sigD } endcode +// (4) If pre-adder was present, find match 'A' input for A2REG +// If pre-adder was not present, move ADREG to A2REG +// Then match 'A' input for A1REG code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol // Only search for ffA2 if there was a pre-adder // (otherwise ffA2 would have been matched as ffAD) @@ -173,6 +234,8 @@ ffA1_end: ; } endcode +// (5) Match 'B' input for B2REG +// If B2REG, then match 'B' input for B1REG code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol if (param(dsp, \BREG).as_int() == 0) { argQ = sigB; @@ -222,6 +285,7 @@ ffB1_end: ; } endcode +// (6) Match 'D' input for DREG code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock if (param(dsp, \DREG).as_int() == 0) { argQ = sigD; @@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock } endcode +// (7) Match 'P' output that exclusively drives an MREG code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { argD = sigM; @@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock sigP = sigM; endcode +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +// cell (post-adder). +// The other input to the adder is assumed to come in from the 'C' input +// (note: 'P' -> 'C' connections that exist for accumulators are +// recognised in xilinx_dsp.cc). match postAdd // Ensure that Z mux is not already used if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() @@ -291,6 +361,7 @@ code sigC sigP } endcode +// (9) Match 'P' output that exclusively drives a PREG code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock if (param(dsp, \PREG).as_int() == 0) { int users = 2; @@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock } endcode +// (10) If post-adder and PREG both present, match for a $mux cell driving +// the 'C' input, where one of the $mux's inputs is the PREG output. +// This indicates an accumulator situation, and one where a $mux exists +// to override the accumulated value: +// +--------------------------------+ +// | ____ | +// +--| \ | +// |$mux|-+ | +// 'C' ---|____/ | | +// | /-------\ +----+ | +// +----+ +-| post- |___|PREG|---+ 'P' +// |MREG|------ | adder | +----+ +// +----+ \-------/ match postAddMux if postAdd if ffP @@ -333,6 +417,11 @@ code sigC sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); endcode +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +// the 'P' output where it is compared to a constant that is a power-of-2: +// e.g. `assign overflow = (PREG >= 2**40);` +// In this scenario, the pattern detector functionality of a DSP48E1 can +// to implement this function match overflow if ffP if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a31dc80bf..a20d3cdce 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,3 +1,25 @@ +// This file describes the second of three pattern matcher setups that +// forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +// Notes: +// - Separating out CREG packing is necessary since there is no guarantee +// that the cell ordering corresponds to the "expected" case (i.e. the order +// in which they appear in the source) thus the possiblity existed that a +// register got packed as a CREG into a downstream DSP that should have +// otherwise been a PREG of an upstream DSP that had not been visited yet +// - The reason this is separated out from the xilinx_dsp.pmg file is +// for efficiency --- each *.pmg file creates a class of the same basename, +// which when constructed, creates a custom database tailored to the +// pattern(s) contained within. Since the pattern in this file must be +// executed after the pattern contained in xilinx_dsp.pmg, it is necessary +// to reconstruct this database. Separating the two patterns into +// independent files causes two smaller, more specific, databases. + pattern xilinx_dsp_packC udata > unextend @@ -15,13 +37,15 @@ udata dffclock udata dff dffcemux dffrstmux udata dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// and (b) uses the 'C' port match dsp select dsp->type.in(\DSP48E1) select param(dsp, \CREG, 1).as_int() == 0 select nusers(port(dsp, \C, SigSpec())) > 1 endmatch -code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock +code sigC sigP unextend = [](const SigSpec &sig) { int i; for (i = GetSize(sig)-1; i > 0; i--) @@ -47,7 +71,14 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock } else sigP = P; +endcode +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +// (attached to at most two $mux cells that implement clock-enable or +// reset functionality, using a subpattern discussed below) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock + // TODO: Any downside to allowing this? + // If this DSP implements an accumulator, do not attempt to match if (sigC == sigP) reject; -- cgit v1.2.3