diff options
| author | Eddie Hung <eddie@fpgeh.com> | 2019-10-08 10:53:30 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-10-08 10:53:30 -0700 | 
| commit | 472b5d33a624c2e414ce733c1cda0b97bce24094 (patch) | |
| tree | 0da9ac82c0ab9a7697d29365d44530f15498635e | |
| parent | 4072a9666339c76fbef19eac307486d48ed0ae28 (diff) | |
| parent | f90a4b1e24e36943a343bd36315b6029dd6cd044 (diff) | |
| download | yosys-472b5d33a624c2e414ce733c1cda0b97bce24094.tar.gz yosys-472b5d33a624c2e414ce733c1cda0b97bce24094.tar.bz2 yosys-472b5d33a624c2e414ce733c1cda0b97bce24094.zip | |
Merge pull request #1438 from YosysHQ/eddie/xilinx_dsp_comments
Add notes and comments for xilinx_dsp
| -rw-r--r-- | passes/pmgen/xilinx_dsp.cc | 14 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp.pmg | 214 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp_CREG.pmg | 81 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp_cascade.pmg | 115 | ||||
| -rw-r--r-- | techlibs/xilinx/synth_xilinx.cc | 12 | 
5 files changed, 364 insertions, 72 deletions
| diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 3ff921957..054e123e4 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -609,8 +609,13 @@ struct XilinxDspPass : public Pass {  		extra_args(args, argidx, design);  		for (auto module : design->selected_modules()) { +			// Experimental feature: pack $add/$sub cells with +			//   (* use_dsp48="simd" *) into DSP48E1's using its +			//   SIMD feature  			xilinx_simd_pack(module, module->selected_cells()); +			// Match for all features ([ABDMP][12]?REG, pre-adder, +			// post-adder, pattern detector, etc.) except for CREG  			{  				xilinx_dsp_pm pm(module, module->selected_cells());  				pm.run_xilinx_dsp_pack(xilinx_dsp_pack); @@ -619,14 +624,17 @@ struct XilinxDspPass : public Pass {  			//   is no guarantee that the cell ordering corresponds  			//   to the "expected" case (i.e. the order in which  			//   they appear in the source) thus the possiblity -			//   existed that a register got packed as CREG into a +			//   existed that a register got packed as a CREG into a  			//   downstream DSP that should have otherwise been a -			//   PREG of an upstream DSP that had not been pattern -			//   matched yet +			//   PREG of an upstream DSP that had not been visited +			//   yet  			{  				xilinx_dsp_CREG_pm pm(module, module->selected_cells());  				pm.run_xilinx_dsp_packC(xilinx_dsp_packC);  			} +			// Lastly, identify and utilise PCOUT -> PCIN, +			//   ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade +			//   chains  			{  				xilinx_dsp_cascade_pm pm(module, module->selected_cells());  				pm.run_xilinx_dsp_cascade(); diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg index 4e174e753..604aa222b 100644 --- a/passes/pmgen/xilinx_dsp.pmg +++ b/passes/pmgen/xilinx_dsp.pmg @@ -1,3 +1,57 @@ +// This file describes the main pattern matcher setup (of three total) that +//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +//   ( 1) Starting from a DSP48E1 cell +//   ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +//        (attached to at most two $mux cells that implement clock-enable or +//         reset functionality, using a subpattern discussed below) +//        If ADREG matched, treat 'A' input as input of ADREG +//   ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +//       (pre-adder) +//   ( 4) If pre-adder was present, find match 'A' input for A2REG +//        If pre-adder was not present, move ADREG to A2REG +//        If A2REG, then match 'A' input for A1REG +//   ( 5) Match 'B' input for B2REG +//        If B2REG, then match 'B' input for B1REG +//   ( 6) Match 'D' input for DREG +//   ( 7) Match 'P' output that exclusively drives an MREG +//   ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +//        cell (post-adder). +//        The other input to the adder is assumed to come in from the 'C' input +//        (note: 'P' -> 'C' connections that exist for accumulators are +//         recognised in xilinx_dsp.cc). +//   ( 9) Match 'P' output that exclusively drives a PREG +//   (10) If post-adder and PREG both present, match for a $mux cell driving +//        the 'C' input, where one of the $mux's inputs is the PREG output. +//        This indicates an accumulator situation, and one where a $mux exists +//        to override the accumulated value: +//             +--------------------------------+ +//             |   ____                         | +//             +--|    \                        | +//                |$mux|-+                      | +//         'C' ---|____/ |                      | +//                       | /-------\   +----+   | +//            +----+     +-| post- |___|PREG|---+ 'P' +//            |MREG|------ | adder |   +----+ +//            +----+       \-------/ +//   (11) If PREG present, match for a greater-than-or-equal $ge cell attached +//        to the 'P' output where it is compared to a constant that is a +//        power-of-2: e.g. `assign overflow = (PREG >= 2**40);` +//        In this scenario, the pattern detector functionality of a DSP48E1 can +//        to implement this function +// Notes: +//   - The intention of this pattern matcher is for it to be compatible with +//     DSP48E1 cells inferred from multiply operations by Yosys, as well as for +//     user instantiations that may already contain the cells being packed... +//     (though the latter is currently untested) +//   - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used +//     for each *REG match, it has been factored out into two subpatterns: +//     in_dffe and out_dffe located at the bottom of this file. +//   - Matching for pattern detector features is currently incomplete. For +//     example, matching for underflow as well as overflow detection is +//     possible, as would auto-reset, enabling saturated arithmetic, detecting +//     custom patterns, etc. +  pattern xilinx_dsp_pack  state <SigBit> clock @@ -5,12 +59,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP  state <IdString> postAddAB postAddMuxAB  state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol  state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol -  state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux  state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux  state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux -// subpattern +// Variables used for subpatterns  state <SigSpec> argQ argD  state <bool> ffcepol ffrstpol  state <int> ffoffset @@ -19,6 +72,7 @@ udata <SigBit> dffclock  udata <Cell*> dff dffcemux dffrstmux  udata <bool> dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell  match dsp  	select dsp->type.in(\DSP48E1)  endmatch @@ -50,17 +104,21 @@ code sigA sigB sigC sigD sigM clock  			sigM.append(P[i]);  		}  		log_assert(nusers(P.extract_end(i)) <= 1); +		// This sigM could have no users if downstream sinks (e.g. $add) is +		//   narrower than $mul result, for example +		if (sigM.empty()) +			reject;  	}  	else  		sigM = P; -	// This sigM could have no users if downstream $add -	//   is narrower than $mul result, for example -	if (sigM.empty()) -		reject;  	clock = port(dsp, \CLK, SigBit());  endcode +// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG) +//     (attached to at most two $mux cells that implement clock-enable or +//      reset functionality, using a subpattern discussed above) +//     If matched, treat 'A' input as input of ADREG  code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock  	if (param(dsp, \ADREG).as_int() == 0) {  		argQ = sigA; @@ -81,6 +139,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock  	}  endcode +// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell +//     (pre-adder)  match preAdd  	if sigD.empty() || sigD.is_fully_zero()  	// Ensure that preAdder not already used @@ -106,11 +166,12 @@ code sigA sigD  	if (preAdd) {  		sigA = port(preAdd, \A);  		sigD = port(preAdd, \B); -		if (GetSize(sigA) < GetSize(sigD)) -			std::swap(sigA, sigD);  	}  endcode +// (4) If pre-adder was present, find match 'A' input for A2REG +//     If pre-adder was not present, move ADREG to A2REG +//     Then match 'A' input for A1REG  code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol  	// Only search for ffA2 if there was a pre-adder  	//   (otherwise ffA2 would have been matched as ffAD) @@ -173,6 +234,8 @@ ffA1_end:		;  	}  endcode +// (5) Match 'B' input for B2REG +//     If B2REG, then match 'B' input for B1REG  code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol  	if (param(dsp, \BREG).as_int() == 0) {  		argQ = sigB; @@ -222,6 +285,7 @@ ffB1_end:				;  	}  endcode +// (6) Match 'D' input for DREG  code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock  	if (param(dsp, \DREG).as_int() == 0) {  		argQ = sigD; @@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock  	}  endcode +// (7) Match 'P' output that exclusively drives an MREG  code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock  	if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {  		argD = sigM; @@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock  	sigP = sigM;  endcode +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +//     cell (post-adder). +//     The other input to the adder is assumed to come in from the 'C' input +//     (note: 'P' -> 'C' connections that exist for accumulators are +//      recognised in xilinx_dsp.cc).  match postAdd  	// Ensure that Z mux is not already used  	if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() @@ -291,6 +361,7 @@ code sigC sigP  	}  endcode +// (9) Match 'P' output that exclusively drives a PREG  code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock  	if (param(dsp, \PREG).as_int() == 0) {  		int users = 2; @@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock  	}  endcode +// (10) If post-adder and PREG both present, match for a $mux cell driving +//      the 'C' input, where one of the $mux's inputs is the PREG output. +//      This indicates an accumulator situation, and one where a $mux exists +//      to override the accumulated value: +//           +--------------------------------+ +//           |   ____                         | +//           +--|    \                        | +//              |$mux|-+                      | +//       'C' ---|____/ |                      | +//                     | /-------\   +----+   | +//          +----+     +-| post- |___|PREG|---+ 'P' +//          |MREG|------ | adder |   +----+ +//          +----+       \-------/  match postAddMux  	if postAdd  	if ffP @@ -333,6 +417,11 @@ code sigC  		sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);  endcode +// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to +//      the 'P' output where it is compared to a constant that is a power-of-2: +//      e.g. `assign overflow = (PREG >= 2**40);` +//      In this scenario, the pattern detector functionality of a DSP48E1 can +//      to implement this function  match overflow  	if ffP  	if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" @@ -351,22 +440,45 @@ endcode  // ####################### +// Subpattern for matching against input registers, based on knowledge of the +//   'Q' input. Typically, identifying registers with clock-enable and reset +//   capability would be a task would be handled by other Yosys passes such as +//   dff2dffe, but since DSP inference happens much before this, these patterns +//   have to be manually identified. +// At a high level: +//   (1) Starting from a $dff cell that (partially or fully) drives the given +//       'Q' argument +//   (2) Match for a $mux cell implementing synchronous reset semantics --- +//       one that exclusively drives the 'D' input of the $dff, with one of its +//       $mux inputs being fully zero +//   (3) Match for a $mux cell implement clock enable semantics --- one that +//       exclusively drives the 'D' input of the $dff (or the other input of +//       the reset $mux) and where one of this $mux's inputs is connected to +//       the 'Q' output of the $dff  subpattern in_dffe  arg argD argQ clock  code  	dff = nullptr; -	for (auto c : argQ.chunks()) { +	for (const auto &c : argQ.chunks()) { +		// Abandon matches when 'Q' is a constant  		if (!c.wire)  			reject; +		// Abandon matches when 'Q' has the keep attribute set  		if (c.wire->get_bool_attribute(\keep))  			reject; -		Const init = c.wire->attributes.at(\init, State::Sx); -		if (!init.is_fully_undef() && !init.is_fully_zero()) -			reject; +		// Abandon matches when 'Q' has a non-zero init attribute set +		// (not supported by DSP48E1) +		Const init = c.wire->attributes.at(\init, Const()); +		if (!init.empty()) +			for (auto b : init.extract(c.offset, c.width)) +				if (b != State::Sx && b != State::S0) +					reject;  	}  endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +//     'Q' argument  match ff  	select ff->type.in($dff)  	// DSP48E1 does not support clock inversion @@ -379,14 +491,12 @@ match ff  	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)  	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ +	filter clock == SigBit() || port(ff, \CLK) == clock +  	set ffoffset offset  endmatch  code argQ argD -{ -	if (clock != SigBit() && port(ff, \CLK) != clock) -		reject; -  	SigSpec Q = port(ff, \Q);  	dff = ff;  	dffclock = port(ff, \CLK); @@ -398,9 +508,11 @@ code argQ argD  	//   has two (ff, ffrstmux) users  	if (nusers(dffD) > 2)  		argD = SigSpec(); -}  endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +//     exclusively drives the 'D' input of the $dff, with one of the $mux +//     inputs being fully zero  match ffrstmux  	if !argD.empty()  	select ffrstmux->type.in($mux) @@ -432,6 +544,10 @@ code argD  		dffrstmux = nullptr;  endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +//     exclusively drives the 'D' input of the $dff (or the other input of +//     the reset $mux) and where one of this $mux's inputs is connected to +//     the 'Q' output of the $dff  match ffcemux  	if !argD.empty()  	select ffcemux->type.in($mux) @@ -456,16 +572,32 @@ endcode  // ####################### +// Subpattern for matching against output registers, based on knowledge of the +//   'D' input. +// At a high level: +//   (1) Starting from an optional $mux cell that implements clock enable +//       semantics --- one where the given 'D' argument (partially or fully) +//       drives one of its two inputs +//   (2) Starting from, or continuing onto, another optional $mux cell that +//       implements synchronous reset semantics --- one where the given 'D' +//       argument (or the clock enable $mux output) drives one of its two inputs +//       and where the other input is fully zero +//   (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +//       output of the previous clock enable or reset $mux cells)  subpattern out_dffe  arg argD argQ clock  code  	dff = nullptr;  	for (auto c : argD.chunks()) +		// Abandon matches when 'D' has the keep attribute set  		if (c.wire->get_bool_attribute(\keep))  			reject;  endcode +// (1) Starting from an optional $mux cell that implements clock enable +//     semantics --- one where the given 'D' argument (partially or fully) +//     drives one of its two inputs  match ffcemux  	select ffcemux->type.in($mux)  	// ffcemux output must have two users: ffcemux and ff.D @@ -504,6 +636,10 @@ code argD argQ  	}  endcode +// (2) Starting from, or continuing onto, another optional $mux cell that +//     implements synchronous reset semantics --- one where the given 'D' +//     argument (or the clock enable $mux output) drives one of its two inputs +//     and where the other input is fully zero  match ffrstmux  	select ffrstmux->type.in($mux)  	// ffrstmux output must have two users: ffrstmux and ff.D @@ -542,6 +678,8 @@ code argD argQ  	}  endcode +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +//     output of the previous clock enable or reset $mux cells)  match ff  	select ff->type.in($dff)  	// DSP48E1 does not support clock inversion @@ -558,32 +696,30 @@ match ff  	// Check that FF.Q is connected to CE-mux  	filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ +	filter clock == SigBit() || port(ff, \CLK) == clock +  	set ffoffset offset  endmatch  code argQ -	if (ff) { -		if (clock != SigBit() && port(ff, \CLK) != clock) -			reject; - -		SigSpec D = port(ff, \D); -		SigSpec Q = port(ff, \Q); -		if (!ffcemux) { -			argQ = argD; -			argQ.replace(D, Q); -		} - -		for (auto c : argQ.chunks()) { -			Const init = c.wire->attributes.at(\init, State::Sx); -			if (!init.is_fully_undef() && !init.is_fully_zero()) -				reject; -		} +	SigSpec D = port(ff, \D); +	SigSpec Q = port(ff, \Q); +	if (!ffcemux) { +		argQ = argD; +		argQ.replace(D, Q); +	} -		dff = ff; -		dffQ = argQ; -		dffclock = port(ff, \CLK); +	// Abandon matches when 'Q' has a non-zero init attribute set +	// (not supported by DSP48E1) +	for (auto c : argQ.chunks()) { +		Const init = c.wire->attributes.at(\init, Const()); +		if (!init.empty()) +			for (auto b : init.extract(c.offset, c.width)) +				if (b != State::Sx && b != State::S0) +					reject;  	} -	// No enable/reset mux possible without flop -	else if (dffcemux || dffrstmux) -		reject; + +	dff = ff; +	dffQ = argQ; +	dffclock = port(ff, \CLK);  endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index a31dc80bf..a57043009 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,3 +1,26 @@ +// This file describes the second of three pattern matcher setups that +//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +//   (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +//       and (b) uses the 'C' port +//   (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +//       (attached to at most two $mux cells that implement clock-enable or +//        reset functionality, using a subpattern discussed below) +// Notes: +//   - Running CREG packing after xilinx_dsp_pack is necessary since there is no +//     guarantee that the cell ordering corresponds to the "expected" case (i.e. +//     the order in which they appear in the source) thus the possiblity existed +//     that a register got packed as a CREG into a downstream DSP that should +//     have otherwise been a PREG of an upstream DSP that had not been visited +//     yet +//   - The reason this is separated out from the xilinx_dsp.pmg file is +//     for efficiency --- each *.pmg file creates a class of the same basename, +//     which when constructed, creates a custom database tailored to the +//     pattern(s) contained within. Since the pattern in this file must be +//     executed after the pattern contained in xilinx_dsp.pmg, it is necessary +//     to reconstruct this database. Separating the two patterns into +//     independent files causes two smaller, more specific, databases. +  pattern xilinx_dsp_packC  udata <std::function<SigSpec(const SigSpec&)>> unextend @@ -6,7 +29,7 @@ state <SigSpec> sigC sigP  state <bool> ffCcepol ffCrstpol  state <Cell*> ffC ffCcemux ffCrstmux -// subpattern +// Variables used for subpatterns  state <SigSpec> argQ argD  state <bool> ffcepol ffrstpol  state <int> ffoffset @@ -15,13 +38,15 @@ udata <SigBit> dffclock  udata <Cell*> dff dffcemux dffrstmux  udata <bool> dffcepol dffrstpol +// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +//     and (b) uses the 'C' port  match dsp  	select dsp->type.in(\DSP48E1)  	select param(dsp, \CREG, 1).as_int() == 0  	select nusers(port(dsp, \C, SigSpec())) > 1  endmatch -code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock +code sigC sigP clock  	unextend = [](const SigSpec &sig) {  		int i;  		for (i = GetSize(sig)-1; i > 0; i--) @@ -48,11 +73,13 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock  	else  		sigP = P; -	if (sigC == sigP) -		reject; -  	clock = port(dsp, \CLK, SigBit()); +endcode +// (2) Match the driver of the 'C' input to a possible $dff cell (CREG) +//     (attached to at most two $mux cells that implement clock-enable or +//      reset functionality, using the in_dffe subpattern) +code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock  	argQ = sigC;  	subpattern(in_dffe);  	if (dff) { @@ -77,22 +104,44 @@ endcode  // ####################### +// Subpattern for matching against input registers, based on knowledge of the +//   'Q' input. Typically, identifying registers with clock-enable and reset +//   capability would be a task would be handled by other Yosys passes such as +//   dff2dffe, but since DSP inference happens much before this, these patterns +//   have to be manually identified. +// At a high level: +//   (1) Starting from a $dff cell that (partially or fully) drives the given +//       'Q' argument +//   (2) Match for a $mux cell implementing synchronous reset semantics --- +//       one that exclusively drives the 'D' input of the $dff, with one of its +//       $mux inputs being fully zero +//   (3) Match for a $mux cell implement clock enable semantics --- one that +//       exclusively drives the 'D' input of the $dff (or the other input of +//       the reset $mux) and where one of this $mux's inputs is connected to +//       the 'Q' output of the $dff  subpattern in_dffe  arg argD argQ clock  code  	dff = nullptr; -	for (auto c : argQ.chunks()) { +	for (const auto &c : argQ.chunks()) { +		// Abandon matches when 'Q' is a constant  		if (!c.wire)  			reject; +		// Abandon matches when 'Q' has the keep attribute set  		if (c.wire->get_bool_attribute(\keep))  			reject; -		Const init = c.wire->attributes.at(\init, State::Sx); -		if (!init.is_fully_undef() && !init.is_fully_zero()) -			reject; +		// Abandon matches when 'Q' has a non-zero init attribute set +		// (not supported by DSP48E1) +		Const init = c.wire->attributes.at(\init, Const()); +		for (auto b : init.extract(c.offset, c.width)) +			if (b != State::Sx && b != State::S0) +				reject;  	}  endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +//     'Q' argument  match ff  	select ff->type.in($dff)  	// DSP48E1 does not support clock inversion @@ -105,14 +154,12 @@ match ff  	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)  	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ +	filter clock == SigBit() || port(ff, \CLK) == clock +  	set ffoffset offset  endmatch  code argQ argD -{ -	if (clock != SigBit() && port(ff, \CLK) != clock) -		reject; -  	SigSpec Q = port(ff, \Q);  	dff = ff;  	dffclock = port(ff, \CLK); @@ -124,9 +171,11 @@ code argQ argD  	//   has two (ff, ffrstmux) users  	if (nusers(dffD) > 2)  		argD = SigSpec(); -}  endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +//     exclusively drives the 'D' input of the $dff, with one of the $mux +//     inputs being fully zero  match ffrstmux  	if !argD.empty()  	select ffrstmux->type.in($mux) @@ -158,6 +207,10 @@ code argD  		dffrstmux = nullptr;  endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +//     exclusively drives the 'D' input of the $dff (or the other input of +//     the reset $mux) and where one of this $mux's inputs is connected to +//     the 'Q' output of the $dff  match ffcemux  	if !argD.empty()  	select ffcemux->type.in($mux) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 6f4ac5849..7a32df2b7 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -1,3 +1,46 @@ +// This file describes the third of three pattern matcher setups that +//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc +// At a high level, it works as follows: +//   (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +//       (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +//       use the 'PCOUT' port +//   (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +//         (b) has its Z multiplexer output set to the 'C' port, which is +//         driven by the 'P' output of the previous DSP cell, and (c) has its +//         'PCIN' port unused +//   (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +//         previous DSP cell right-shifted by 17 bits +//   (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +//       if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +//       DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +//       have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +//       use its ACOUT port, then examine if an ACOUT -> ACIN cascade +//       opportunity exists by matching for a $dff-with-optional-clock-enable- +//       or-reset and checking that the 'D' input of this register is the same +//       as the 'A' input of the previous DSP +//   (4) Same as (3) but for BCOUT -> BCIN cascade +//   (5) Recursively go to (2.1) until no more matches possible, keeping track +//       of the longest possible chain found +//   (6) The longest chain is then divided into chunks of no more than +//       MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +//       height of a DSP column) with each DSP in each chunk being rewritten +//       to use [ABP]COUT -> [ABP]CIN cascading as appropriate +// Notes: +//   - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered +//     if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need +//     not be the case --- [AB] cascades can exist independently of a P cascade +//     (though all three cascades must come from the same DSP). This situation +//     is not handled currently. +//   - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently +//     conservative in that they examine the situation where (a) the previous +//     DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no +//     registers enabled, and (c) that there exists only one additional register +//     between the upstream and downstream DSPs. This can certainly be relaxed +//     to identify situations ranging from (i) neither DSP uses any registers, +//     to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and +//     there exists a further 2 registers between them. This remains a TODO +//     item. +  pattern xilinx_dsp_cascade  udata <std::function<SigSpec(const SigSpec&)>> unextend @@ -6,7 +49,7 @@ state <Cell*> next  state <SigSpec> clock  state <int> AREG BREG -// subpattern +// Variables used for subpatterns  state <SigSpec> argQ argD  state <bool> ffcepol ffrstpol  state <int> ffoffset @@ -19,12 +62,19 @@ code  #define MAX_DSP_CASCADE 20  endcode +// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer +//     (controlled by OPMODE[6:4]) set to zero and (b) doesn't already +//     use the 'PCOUT' port  match first  	select first->type.in(\DSP48E1)  	select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")  	select nusers(port(first, \PCOUT, SigSpec())) <= 1  endmatch +// (6) The longest chain is then divided into chunks of no more than +//     MAX_DSP_CASCADE in length (to prevent long cascades that exceed the +//     height of a DSP column) with each DSP in each chunk being rewritten +//     to use [ABP]COUT -> [ABP]CIN cascading as appropriate  code  	longest_chain.clear();  	chain.emplace_back(first, -1, -1, -1); @@ -106,6 +156,10 @@ subpattern tail  arg first  arg next +// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +//       (b) has its Z multiplexer output set to the 'C' port, which is +//       driven by the 'P' output of the previous DSP cell, and (c) has its +//       'PCIN' port unused  match nextP  	select nextP->type.in(\DSP48E1)  	select !param(nextP, \CREG, State::S1).as_bool() @@ -116,6 +170,8 @@ match nextP  	semioptional  endmatch +// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the +//       previous DSP cell right-shifted by 17 bits  match nextP_shift17  	if !nextP  	select nextP_shift17->type.in(\DSP48E1) @@ -145,6 +201,14 @@ code next  	}  endcode +// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) +//     if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this +//     DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already +//     have an ACOUT -> ACIN cascade, (d) the previous DSP does not already +//     use its ACOUT port, then examine if an ACOUT -> ACIN cascade +//     opportunity exists by matching for a $dff-with-optional-clock-enable- +//     or-reset and checking that the 'D' input of this register is the same +//     as the 'A' input of the previous DSP  code argQ clock AREG  	AREG = -1;  	if (next) { @@ -152,7 +216,6 @@ code argQ clock AREG  		if (param(prev, \AREG, 2).as_int() > 0 &&  				param(next, \AREG, 2).as_int() > 0 &&  				param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && -				port(next, \ACIN, SigSpec()).is_fully_zero() &&  				nusers(port(prev, \ACOUT, SigSpec())) <= 1) {  			argQ = unextend(port(next, \A));  			clock = port(prev, \CLK); @@ -174,6 +237,7 @@ reject_AREG:			;  	}  endcode +// (4) Same as (3) but for BCOUT -> BCIN cascade  code argQ clock BREG  	BREG = -1;  	if (next) { @@ -203,13 +267,14 @@ reject_BREG:			;  	}  endcode +// (5) Recursively go to (2.1) until no more matches possible, recording the +//     longest possible chain  code  	if (next) {  		chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);  		SigSpec sigC = unextend(port(next, \C)); -		// TODO: Cannot use 'reject' since semioptional  		if (nextP_shift17) {  			if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&  					port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC) @@ -232,22 +297,44 @@ endcode  // ####################### +// Subpattern for matching against input registers, based on knowledge of the +//   'Q' input. Typically, identifying registers with clock-enable and reset +//   capability would be a task would be handled by other Yosys passes such as +//   dff2dffe, but since DSP inference happens much before this, these patterns +//   have to be manually identified. +// At a high level: +//   (1) Starting from a $dff cell that (partially or fully) drives the given +//       'Q' argument +//   (2) Match for a $mux cell implementing synchronous reset semantics --- +//       one that exclusively drives the 'D' input of the $dff, with one of its +//       $mux inputs being fully zero +//   (3) Match for a $mux cell implement clock enable semantics --- one that +//       exclusively drives the 'D' input of the $dff (or the other input of +//       the reset $mux) and where one of this $mux's inputs is connected to +//       the 'Q' output of the $dff  subpattern in_dffe  arg argD argQ clock  code  	dff = nullptr; -	for (auto c : argQ.chunks()) { +	for (const auto &c : argQ.chunks()) { +		// Abandon matches when 'Q' is a constant  		if (!c.wire)  			reject; +		// Abandon matches when 'Q' has the keep attribute set  		if (c.wire->get_bool_attribute(\keep))  			reject; -		Const init = c.wire->attributes.at(\init, State::Sx); -		if (!init.is_fully_undef() && !init.is_fully_zero()) -			reject; +		// Abandon matches when 'Q' has a non-zero init attribute set +		// (not supported by DSP48E1) +		Const init = c.wire->attributes.at(\init, Const()); +		for (auto b : init.extract(c.offset, c.width)) +			if (b != State::Sx && b != State::S0) +				reject;  	}  endcode +// (1) Starting from a $dff cell that (partially or fully) drives the given +//     'Q' argument  match ff  	select ff->type.in($dff)  	// DSP48E1 does not support clock inversion @@ -260,14 +347,12 @@ match ff  	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)  	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ +	filter clock == SigBit() || port(ff, \CLK) == clock +  	set ffoffset offset  endmatch  code argQ argD -{ -	if (clock != SigBit() && port(ff, \CLK) != clock) -		reject; -  	SigSpec Q = port(ff, \Q);  	dff = ff;  	dffclock = port(ff, \CLK); @@ -279,9 +364,11 @@ code argQ argD  	//   has two (ff, ffrstmux) users  	if (nusers(dffD) > 2)  		argD = SigSpec(); -}  endcode +// (2) Match for a $mux cell implementing synchronous reset semantics --- +//     exclusively drives the 'D' input of the $dff, with one of the $mux +//     inputs being fully zero  match ffrstmux  	if !argD.empty()  	select ffrstmux->type.in($mux) @@ -313,6 +400,10 @@ code argD  		dffrstmux = nullptr;  endcode +// (3) Match for a $mux cell implement clock enable semantics --- one that +//     exclusively drives the 'D' input of the $dff (or the other input of +//     the reset $mux) and where one of this $mux's inputs is connected to +//     the 'Q' output of the $dff  match ffcemux  	if !argD.empty()  	select ffcemux->type.in($mux) diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc index 41429b338..4fe287744 100644 --- a/techlibs/xilinx/synth_xilinx.cc +++ b/techlibs/xilinx/synth_xilinx.cc @@ -342,10 +342,14 @@ struct SynthXilinxPass : public ScriptPass  		if (check_label("map_dsp", "(skip if '-nodsp')")) {  			if (!nodsp || help_mode) {  				// NB: Xilinx multipliers are signed only -				run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 " -						"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers -						"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller -						"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18"); +				run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 " +					"-D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 "    // Partial multipliers are intentionally +												// limited to 18x18 in order to take +												// advantage of the (PCOUT << 17) -> PCIN +												// dedicated cascade chain capability +					"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers +					"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller +					"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18");  				run("select a:mul2dsp");  				run("setattr -unset mul2dsp");  				run("opt_expr -fine"); | 
