diff options
Diffstat (limited to 'passes/pmgen')
| -rw-r--r-- | passes/pmgen/Makefile.inc | 3 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp.cc | 212 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp48a.pmg | 673 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp_CREG.pmg | 9 | ||||
| -rw-r--r-- | passes/pmgen/xilinx_dsp_cascade.pmg | 193 | 
5 files changed, 1012 insertions, 78 deletions
| diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index 145d2ebf9..1a57bef7d 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -22,8 +22,9 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h))  # --------------------------------------  OBJS += passes/pmgen/xilinx_dsp.o -passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h +passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp48a_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h  $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h)) +$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp48a_pm.h))  $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_CREG_pm.h))  $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h)) diff --git a/passes/pmgen/xilinx_dsp.cc b/passes/pmgen/xilinx_dsp.cc index 054e123e4..81c3c57c4 100644 --- a/passes/pmgen/xilinx_dsp.cc +++ b/passes/pmgen/xilinx_dsp.cc @@ -26,6 +26,7 @@ USING_YOSYS_NAMESPACE  PRIVATE_NAMESPACE_BEGIN  #include "passes/pmgen/xilinx_dsp_pm.h" +#include "passes/pmgen/xilinx_dsp48a_pm.h"  #include "passes/pmgen/xilinx_dsp_CREG_pm.h"  #include "passes/pmgen/xilinx_dsp_cascade_pm.h" @@ -487,6 +488,190 @@ void xilinx_dsp_pack(xilinx_dsp_pm &pm)  	pm.blacklist(cell);  } +void xilinx_dsp48a_pack(xilinx_dsp48a_pm &pm) +{ +	auto &st = pm.st_xilinx_dsp48a_pack; + +	log("Analysing %s.%s for Xilinx DSP48A/DSP48A1 packing.\n", log_id(pm.module), log_id(st.dsp)); + +	log_debug("preAdd:     %s\n", log_id(st.preAdd, "--")); +	log_debug("ffA1:       %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--")); +	log_debug("ffA0:       %s %s %s\n", log_id(st.ffA0, "--"), log_id(st.ffA0cemux, "--"), log_id(st.ffA0rstmux, "--")); +	log_debug("ffB1:       %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--")); +	log_debug("ffB0:       %s %s %s\n", log_id(st.ffB0, "--"), log_id(st.ffB0cemux, "--"), log_id(st.ffB0rstmux, "--")); +	log_debug("ffD:        %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--")); +	log_debug("dsp:        %s\n", log_id(st.dsp, "--")); +	log_debug("ffM:        %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--")); +	log_debug("postAdd:    %s\n", log_id(st.postAdd, "--")); +	log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--")); +	log_debug("ffP:        %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--")); + +	Cell *cell = st.dsp; +	SigSpec &opmode = cell->connections_.at(ID(OPMODE)); + +	if (st.preAdd) { +		log("  preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type)); +		bool D_SIGNED = st.preAdd->getParam(ID(A_SIGNED)).as_bool(); +		bool B_SIGNED = st.preAdd->getParam(ID(B_SIGNED)).as_bool(); +		st.sigB.extend_u0(18, B_SIGNED); +		st.sigD.extend_u0(18, D_SIGNED); +		cell->setPort(ID(B), st.sigB); +		cell->setPort(ID(D), st.sigD); +		opmode[4] = State::S1; +		if (st.preAdd->type == ID($add)) +			opmode[6] = State::S0; +		else if (st.preAdd->type == ID($sub)) +			opmode[6] = State::S1; +		else +			log_assert(!"strange pre-adder type"); + +		pm.autoremove(st.preAdd); +	} +	if (st.postAdd) { +		log("  postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type)); + +		if (st.postAddMux) { +			log_assert(st.ffP); +			opmode[2] = st.postAddMux->getPort(ID(S)); +			pm.autoremove(st.postAddMux); +		} +		else if (st.ffP && st.sigC == st.sigP) +			opmode[2] = State::S0; +		else +			opmode[2] = State::S1; +		opmode[3] = State::S1; + +		if (opmode[2] != State::S0) { +			if (st.postAddMuxAB == ID(A)) +				st.sigC.extend_u0(48, st.postAdd->getParam(ID(B_SIGNED)).as_bool()); +			else +				st.sigC.extend_u0(48, st.postAdd->getParam(ID(A_SIGNED)).as_bool()); +			cell->setPort(ID(C), st.sigC); +		} + +		pm.autoremove(st.postAdd); +	} + +	if (st.clock != SigBit()) +	{ +		cell->setPort(ID(CLK), st.clock); + +		auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) { +			SigSpec D = ff->getPort(ID(D)); +			SigSpec Q = pm.sigmap(ff->getPort(ID(Q))); +			if (!A.empty()) +				A.replace(Q, D); +			if (rstmux) { +				SigSpec Y = rstmux->getPort(ID(Y)); +				SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B)); +				if (!A.empty()) +					A.replace(Y, AB); +				if (rstport != IdString()) { +					SigSpec S = rstmux->getPort(ID(S)); +					cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S)); +				} +			} +			else if (rstport != IdString()) +				cell->setPort(rstport, State::S0); +			if (cemux) { +				SigSpec Y = cemux->getPort(ID(Y)); +				SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A)); +				SigSpec S = cemux->getPort(ID(S)); +				if (!A.empty()) +					A.replace(Y, BA); +				cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S)); +			} +			else +				cell->setPort(ceport, State::S1); + +			for (auto c : Q.chunks()) { +				auto it = c.wire->attributes.find(ID(init)); +				if (it == c.wire->attributes.end()) +					continue; +				for (int i = c.offset; i < c.offset+c.width; i++) { +					log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx); +					it->second[i] = State::Sx; +				} +			} +		}; + +		if (st.ffA0 || st.ffA1) { +			SigSpec A = cell->getPort(ID(A)); +			if (st.ffA1) { +				f(A, st.ffA1, st.ffA1cemux, st.ffAcepol, ID(CEA), st.ffA1rstmux, st.ffArstpol, ID(RSTA)); +				cell->setParam(ID(A1REG), 1); +			} +			if (st.ffA0) { +				f(A, st.ffA0, st.ffA0cemux, st.ffAcepol, ID(CEA), st.ffA0rstmux, st.ffArstpol, ID(RSTA)); +				cell->setParam(ID(A0REG), 1); +			} +			pm.add_siguser(A, cell); +			cell->setPort(ID(A), A); +		} +		if (st.ffB0 || st.ffB1) { +			SigSpec B = cell->getPort(ID(B)); +			if (st.ffB1) { +				f(B, st.ffB1, st.ffB1cemux, st.ffBcepol, ID(CEB), st.ffB1rstmux, st.ffBrstpol, ID(RSTB)); +				cell->setParam(ID(B1REG), 1); +			} +			if (st.ffB0) { +				f(B, st.ffB0, st.ffB0cemux, st.ffBcepol, ID(CEB), st.ffB0rstmux, st.ffBrstpol, ID(RSTB)); +				cell->setParam(ID(B0REG), 1); +			} +			pm.add_siguser(B, cell); +			cell->setPort(ID(B), B); +		} +		if (st.ffD) { +			SigSpec D = cell->getPort(ID(D)); +			f(D, st.ffD, st.ffDcemux, st.ffDcepol, ID(CED), st.ffDrstmux, st.ffDrstpol, ID(RSTD)); +			pm.add_siguser(D, cell); +			cell->setPort(ID(D), D); +			cell->setParam(ID(DREG), 1); +		} +		if (st.ffM) { +			SigSpec M; // unused +			f(M, st.ffM, st.ffMcemux, st.ffMcepol, ID(CEM), st.ffMrstmux, st.ffMrstpol, ID(RSTM)); +			st.ffM->connections_.at(ID(Q)).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM))); +			cell->setParam(ID(MREG), State::S1); +		} +		if (st.ffP) { +			SigSpec P; // unused +			f(P, st.ffP, st.ffPcemux, st.ffPcepol, ID(CEP), st.ffPrstmux, st.ffPrstpol, ID(RSTP)); +			st.ffP->connections_.at(ID(Q)).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP))); +			cell->setParam(ID(PREG), State::S1); +		} + +		log("  clock: %s (%s)", log_signal(st.clock), "posedge"); + +		if (st.ffA0) +			log(" ffA0:%s", log_id(st.ffA0)); +		if (st.ffA1) +			log(" ffA1:%s", log_id(st.ffA1)); + +		if (st.ffB0) +			log(" ffB0:%s", log_id(st.ffB0)); +		if (st.ffB1) +			log(" ffB1:%s", log_id(st.ffB1)); + +		if (st.ffD) +			log(" ffD:%s", log_id(st.ffD)); + +		if (st.ffM) +			log(" ffM:%s", log_id(st.ffM)); + +		if (st.ffP) +			log(" ffP:%s", log_id(st.ffP)); +	} +	log("\n"); + +	SigSpec P = st.sigP; +	if (GetSize(P) < 48) +		P.append(pm.module->addWire(NEW_ID, 48-GetSize(P))); +	cell->setPort(ID(P), P); + +	pm.blacklist(cell); +} +  void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm)  {  	auto &st = pm.st_xilinx_dsp_packC; @@ -592,33 +777,48 @@ struct XilinxDspPass : public Pass {  		log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n");  		log("into using the DSP48E1's pattern detector feature for overflow detection.\n");  		log("\n"); +		log("    -family {xcup|xcu|xc7|xc6v|xc5v|xc4v|xc6s|xc3sda}\n"); +		log("        select the family to target\n"); +		log("        default: xc7\n"); +		log("\n");  	}  	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE  	{  		log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n"); +		std::string family = "xc7";  		size_t argidx;  		for (argidx = 1; argidx < args.size(); argidx++)  		{ -			// if (args[argidx] == "-singleton") { -			// 	singleton_mode = true; -			// 	continue; -			// } +			if ((args[argidx] == "-family" || args[argidx] == "-arch") && argidx+1 < args.size()) { +				family = args[++argidx]; +				continue; +			}  			break;  		}  		extra_args(args, argidx, design); +		// Don't bother distinguishing between those. +		if (family == "xc6v") +			family = "xc7"; +		if (family == "xcup") +			family = "xcu"; +  		for (auto module : design->selected_modules()) {  			// Experimental feature: pack $add/$sub cells with  			//   (* use_dsp48="simd" *) into DSP48E1's using its  			//   SIMD feature -			xilinx_simd_pack(module, module->selected_cells()); +			if (family == "xc7") +				xilinx_simd_pack(module, module->selected_cells());  			// Match for all features ([ABDMP][12]?REG, pre-adder,  			// post-adder, pattern detector, etc.) except for CREG -			{ +			if (family == "xc7") {  				xilinx_dsp_pm pm(module, module->selected_cells());  				pm.run_xilinx_dsp_pack(xilinx_dsp_pack); +			} else if (family == "xc6s" || family == "xc3sda") { +				xilinx_dsp48a_pm pm(module, module->selected_cells()); +				pm.run_xilinx_dsp48a_pack(xilinx_dsp48a_pack);  			}  			// Separating out CREG packing is necessary since there  			//   is no guarantee that the cell ordering corresponds diff --git a/passes/pmgen/xilinx_dsp48a.pmg b/passes/pmgen/xilinx_dsp48a.pmg new file mode 100644 index 000000000..16f5e598d --- /dev/null +++ b/passes/pmgen/xilinx_dsp48a.pmg @@ -0,0 +1,673 @@ +// This file describes the main pattern matcher setup (of three total) that +//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc - version for +//   DSP48A/DSP48A1 (Spartan 3A DSP, Spartan 6). +// At a high level, it works as follows: +//   ( 1) Starting from a DSP48A/DSP48A1 cell +//   ( 2) Match the driver of the 'B' input to a possible $dff cell (B1REG) +//        (attached to at most two $mux cells that implement clock-enable or +//         reset functionality, using a subpattern discussed below) +//        If B1REG matched, treat 'B' input as input of B1REG +//   ( 3) Match the driver of the 'B' and 'D' inputs for a possible $add cell +//       (pre-adder) +//   ( 4) Match 'B' input for B0REG +//   ( 5) Match 'A' input for A1REG +//        If A1REG, then match 'A' input for A0REG +//   ( 6) Match 'D' input for DREG +//   ( 7) Match 'P' output that exclusively drives an MREG +//   ( 8) Match 'P' output that exclusively drives one of two inputs to an $add +//        cell (post-adder). +//        The other input to the adder is assumed to come in from the 'C' input +//        (note: 'P' -> 'C' connections that exist for accumulators are +//         recognised in xilinx_dsp.cc). +//   ( 9) Match 'P' output that exclusively drives a PREG +//   (10) If post-adder and PREG both present, match for a $mux cell driving +//        the 'C' input, where one of the $mux's inputs is the PREG output. +//        This indicates an accumulator situation, and one where a $mux exists +//        to override the accumulated value: +//             +--------------------------------+ +//             |   ____                         | +//             +--|    \                        | +//                |$mux|-+                      | +//         'C' ---|____/ |                      | +//                       | /-------\   +----+   | +//            +----+     +-| post- |___|PREG|---+ 'P' +//            |MREG|------ | adder |   +----+ +//            +----+       \-------/ +// Notes: see the notes in xilinx_dsp.pmg + +pattern xilinx_dsp48a_pack + +state <SigBit> clock +state <SigSpec> sigA sigB sigC sigD sigM sigP +state <IdString> postAddAB postAddMuxAB +state <bool> ffAcepol ffBcepol ffDcepol ffMcepol ffPcepol +state <bool> ffArstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol +state <Cell*> ffA0 ffA0cemux ffA0rstmux ffA1 ffA1cemux ffA1rstmux +state <Cell*> ffB0 ffB0cemux ffB0rstmux ffB1 ffB1cemux ffB1rstmux +state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux + +// Variables used for subpatterns +state <SigSpec> argQ argD +state <bool> ffcepol ffrstpol +state <int> ffoffset +udata <SigSpec> dffD dffQ +udata <SigBit> dffclock +udata <Cell*> dff dffcemux dffrstmux +udata <bool> dffcepol dffrstpol + +// (1) Starting from a DSP48A/DSP48A1 cell +match dsp +	select dsp->type.in(\DSP48A, \DSP48A1) +endmatch + +code sigA sigB sigC sigD sigM clock +	auto unextend = [](const SigSpec &sig) { +		int i; +		for (i = GetSize(sig)-1; i > 0; i--) +			if (sig[i] != sig[i-1]) +				break; +		// Do not remove non-const sign bit +		if (sig[i].wire) +			++i; +		return sig.extract(0, i); +	}; +	sigA = unextend(port(dsp, \A)); +	sigB = unextend(port(dsp, \B)); + +	sigC = port(dsp, \C, SigSpec()); +	sigD = port(dsp, \D, SigSpec()); + +	SigSpec P = port(dsp, \P); +	// Only care about those bits that are used +	int i; +	for (i = GetSize(P)-1; i >= 0; i--) +		if (nusers(P[i]) > 1) +			break; +	i++; +	log_assert(nusers(P.extract_end(i)) <= 1); +	// This sigM could have no users if downstream sinks (e.g. $add) is +	//   narrower than $mul result, for example +	if (i == 0) +		reject; +	sigM = P.extract(0, i); + +	clock = port(dsp, \CLK, SigBit()); +endcode + +// (2) Match the driver of the 'B' input to a possible $dff cell (B1REG) +//     (attached to at most two $mux cells that implement clock-enable or +//      reset functionality, using a subpattern discussed above) +//     If matched, treat 'B' input as input of B1REG +code argQ ffB1 ffB1cemux ffB1rstmux ffBcepol ffBrstpol sigB clock +	if (param(dsp, \B1REG).as_int() == 0 && param(dsp, \B0REG).as_int() == 0 && port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()) { +		argQ = sigB; +		subpattern(in_dffe); +		if (dff) { +			ffB1 = dff; +			clock = dffclock; +			if (dffrstmux) { +				ffB1rstmux = dffrstmux; +				ffBrstpol = dffrstpol; +			} +			if (dffcemux) { +				ffB1cemux = dffcemux; +				ffBcepol = dffcepol; +			} +			sigB = dffD; +		} +	} +endcode + +// (3) Match the driver of the 'B' and 'D' inputs for a possible $add cell +//     (pre-adder) +match preAdd +	if sigD.empty() || sigD.is_fully_zero() +	if param(dsp, \B0REG).as_int() == 0 +	// Ensure that preAdder not already used +	if port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero() + +	select preAdd->type.in($add, $sub) +	// Output has to be 18 bits or less +	select GetSize(port(preAdd, \Y)) <= 18 +	select nusers(port(preAdd, \Y)) == 2 +	// D port has to be 18 bits or less +	select GetSize(port(preAdd, \A)) <= 18 +	// B port has to be 18 bits or less +	select GetSize(port(preAdd, \B)) <= 18 +	index <SigSpec> port(preAdd, \Y) === sigB + +	optional +endmatch + +code sigB sigD +	if (preAdd) { +		sigD = port(preAdd, \A); +		sigB = port(preAdd, \B); +	} +endcode + +// (4) Match 'B' input for B0REG +code argQ ffB0 ffB0cemux ffB0rstmux ffBcepol ffBrstpol sigB clock +	if (param(dsp, \B0REG).as_int() == 0) { +		argQ = sigB; +		subpattern(in_dffe); +		if (dff) { +			if (ffB1) { +				if ((ffB1rstmux != nullptr) ^ (dffrstmux != nullptr)) +					goto ffB0_end; +				if ((ffB1cemux != nullptr) ^ (dffcemux != nullptr)) +					goto ffB0_end; +				if (dffrstmux) { +					if (ffBrstpol != dffrstpol) +						goto ffB0_end; +					if (port(ffB1rstmux, \S) != port(dffrstmux, \S)) +						goto ffB0_end; +					ffB0rstmux = dffrstmux; +				} +				if (dffcemux) { +					if (ffBcepol != dffcepol) +						goto ffB0_end; +					if (port(ffB1cemux, \S) != port(dffcemux, \S)) +						goto ffB0_end; +					ffB0cemux = dffcemux; +				} +			} +			ffB0 = dff; +			clock = dffclock; +			if (dffrstmux) { +				ffB0rstmux = dffrstmux; +				ffBrstpol = dffrstpol; +			} +			if (dffcemux) { +				ffB0cemux = dffcemux; +				ffBcepol = dffcepol; +			} +			sigB = dffD; +		} +	} +ffB0_end: +endcode + +// (5) Match 'A' input for A1REG +//     If A1REG, then match 'A' input for A0REG +code argQ ffA1 ffA1cemux ffA1rstmux ffAcepol ffArstpol sigA clock ffA0 ffA0cemux ffA0rstmux +	if (param(dsp, \A0REG).as_int() == 0 && param(dsp, \A1REG).as_int() == 0) { +		argQ = sigA; +		subpattern(in_dffe); +		if (dff) { +			ffA1 = dff; +			clock = dffclock; +			if (dffrstmux) { +				ffA1rstmux = dffrstmux; +				ffArstpol = dffrstpol; +			} +			if (dffcemux) { +				ffA1cemux = dffcemux; +				ffAcepol = dffcepol; +			} +			sigA = dffD; + +			// Now attempt to match A0 +			if (ffA1) { +				argQ = sigA; +				subpattern(in_dffe); +				if (dff) { +					if ((ffA1rstmux != nullptr) ^ (dffrstmux != nullptr)) +						goto ffA0_end; +					if ((ffA1cemux != nullptr) ^ (dffcemux != nullptr)) +						goto ffA0_end; +					if (dffrstmux) { +						if (ffArstpol != dffrstpol) +							goto ffA0_end; +						if (port(ffA1rstmux, \S) != port(dffrstmux, \S)) +							goto ffA0_end; +						ffA0rstmux = dffrstmux; +					} +					if (dffcemux) { +						if (ffAcepol != dffcepol) +							goto ffA0_end; +						if (port(ffA1cemux, \S) != port(dffcemux, \S)) +							goto ffA0_end; +						ffA0cemux = dffcemux; +					} + +					ffA0 = dff; +					clock = dffclock; + +					if (dffcemux) { +						ffA0cemux = dffcemux; +						ffAcepol = dffcepol; +					} +					sigA = dffD; + +ffA0_end:				; +				} +			} + +		} +	} +endcode + +// (6) Match 'D' input for DREG +code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock +	if (param(dsp, \DREG).as_int() == 0) { +		argQ = sigD; +		subpattern(in_dffe); +		if (dff) { +			ffD = dff; +			clock = dffclock; +			if (dffrstmux) { +				ffDrstmux = dffrstmux; +				ffDrstpol = dffrstpol; +			} +			if (dffcemux) { +				ffDcemux = dffcemux; +				ffDcepol = dffcepol; +			} +			sigD = dffD; +		} +	} +endcode + +// (7) Match 'P' output that exclusively drives an MREG +code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock +	if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { +		argD = sigM; +		subpattern(out_dffe); +		if (dff) { +			ffM = dff; +			clock = dffclock; +			if (dffrstmux) { +				ffMrstmux = dffrstmux; +				ffMrstpol = dffrstpol; +			} +			if (dffcemux) { +				ffMcemux = dffcemux; +				ffMcepol = dffcepol; +			} +			sigM = dffQ; +		} +	} +	sigP = sigM; +endcode + +// (8) Match 'P' output that exclusively drives one of two inputs to an $add +//     cell (post-adder). +//     The other input to the adder is assumed to come in from the 'C' input +//     (note: 'P' -> 'C' connections that exist for accumulators are +//      recognised in xilinx_dsp.cc). +match postAdd +	// Ensure that Z mux is not already used +	if port(dsp, \OPMODE, SigSpec()).extract(2,2).is_fully_zero() + +	select postAdd->type.in($add) +	select GetSize(port(postAdd, \Y)) <= 48 +	choice <IdString> AB {\A, \B} +	select nusers(port(postAdd, AB)) <= 3 +	filter ffMcemux || nusers(port(postAdd, AB)) == 2 +	filter !ffMcemux || nusers(port(postAdd, AB)) == 3 + +	index <SigBit> port(postAdd, AB)[0] === sigP[0] +	filter GetSize(port(postAdd, AB)) >= GetSize(sigP) +	filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP +	// Check that remainder of AB is a sign- or zero-extension +	filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP)) + +	set postAddAB AB +	optional +endmatch + +code sigC sigP +	if (postAdd) { +		sigC = port(postAdd, postAddAB == \A ? \B : \A); +		sigP = port(postAdd, \Y); +	} +endcode + +// (9) Match 'P' output that exclusively drives a PREG +code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock +	if (param(dsp, \PREG).as_int() == 0) { +		int users = 2; +		// If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux +		if (ffMcemux && !postAdd) users++; +		if (nusers(sigP) == users) { +			argD = sigP; +			subpattern(out_dffe); +			if (dff) { +				ffP = dff; +				clock = dffclock; +				if (dffrstmux) { +					ffPrstmux = dffrstmux; +					ffPrstpol = dffrstpol; +				} +				if (dffcemux) { +					ffPcemux = dffcemux; +					ffPcepol = dffcepol; +				} +				sigP = dffQ; +			} +		} +	} +endcode + +// (10) If post-adder and PREG both present, match for a $mux cell driving +//      the 'C' input, where one of the $mux's inputs is the PREG output. +//      This indicates an accumulator situation, and one where a $mux exists +//      to override the accumulated value: +//           +--------------------------------+ +//           |   ____                         | +//           +--|    \                        | +//              |$mux|-+                      | +//       'C' ---|____/ |                      | +//                     | /-------\   +----+   | +//          +----+     +-| post- |___|PREG|---+ 'P' +//          |MREG|------ | adder |   +----+ +//          +----+       \-------/ +match postAddMux +	if postAdd +	if ffP +	select postAddMux->type.in($mux) +	select nusers(port(postAddMux, \Y)) == 2 +	choice <IdString> AB {\A, \B} +	index <SigSpec> port(postAddMux, AB) === sigP +	index <SigSpec> port(postAddMux, \Y) === sigC +	set postAddMuxAB AB +	optional +endmatch + +code sigC +	if (postAddMux) +		sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); +endcode + +code +	accept; +endcode + +// ####################### + +// Subpattern for matching against input registers, based on knowledge of the +//   'Q' input. Typically, identifying registers with clock-enable and reset +//   capability would be a task would be handled by other Yosys passes such as +//   dff2dffe, but since DSP inference happens much before this, these patterns +//   have to be manually identified. +// At a high level: +//   (1) Starting from a $dff cell that (partially or fully) drives the given +//       'Q' argument +//   (2) Match for a $mux cell implementing synchronous reset semantics --- +//       one that exclusively drives the 'D' input of the $dff, with one of its +//       $mux inputs being fully zero +//   (3) Match for a $mux cell implement clock enable semantics --- one that +//       exclusively drives the 'D' input of the $dff (or the other input of +//       the reset $mux) and where one of this $mux's inputs is connected to +//       the 'Q' output of the $dff +subpattern in_dffe +arg argD argQ clock + +code +	dff = nullptr; +	if (GetSize(argQ) == 0) +		reject; +	for (const auto &c : argQ.chunks()) { +		// Abandon matches when 'Q' is a constant +		if (!c.wire) +			reject; +		// Abandon matches when 'Q' has the keep attribute set +		if (c.wire->get_bool_attribute(\keep)) +			reject; +		// Abandon matches when 'Q' has a non-zero init attribute set +		// (not supported by DSP48E1) +		Const init = c.wire->attributes.at(\init, Const()); +		if (!init.empty()) +			for (auto b : init.extract(c.offset, c.width)) +				if (b != State::Sx && b != State::S0) +					reject; +	} +endcode + +// (1) Starting from a $dff cell that (partially or fully) drives the given +//     'Q' argument +match ff +	select ff->type.in($dff) +	// DSP48E1 does not support clock inversion +	select param(ff, \CLK_POLARITY).as_bool() + +	slice offset GetSize(port(ff, \D)) +	index <SigBit> port(ff, \Q)[offset] === argQ[0] + +	// Check that the rest of argQ is present +	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) +	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + +	filter clock == SigBit() || port(ff, \CLK) == clock + +	set ffoffset offset +endmatch + +code argQ argD +	SigSpec Q = port(ff, \Q); +	dff = ff; +	dffclock = port(ff, \CLK); +	dffD = argQ; +	argD = port(ff, \D); +	argQ = Q; +	dffD.replace(argQ, argD); +	// Only search for ffrstmux if dffD only +	//   has two (ff, ffrstmux) users +	if (nusers(dffD) > 2) +		argD = SigSpec(); +endcode + +// (2) Match for a $mux cell implementing synchronous reset semantics --- +//     exclusively drives the 'D' input of the $dff, with one of the $mux +//     inputs being fully zero +match ffrstmux +	if !argD.empty() +	select ffrstmux->type.in($mux) +	index <SigSpec> port(ffrstmux, \Y) === argD + +	choice <IdString> BA {\B, \A} +	// DSP48E1 only supports reset to zero +	select port(ffrstmux, BA).is_fully_zero() + +	define <bool> pol (BA == \B) +	set ffrstpol pol +	semioptional +endmatch + +code argD +	if (ffrstmux) { +		dffrstmux = ffrstmux; +		dffrstpol = ffrstpol; +		argD = port(ffrstmux, ffrstpol ? \A : \B); +		dffD.replace(port(ffrstmux, \Y), argD); + +		// Only search for ffcemux if argQ has at +		//   least 3 users (ff, <upstream>, ffrstmux) and +		//   dffD only has two (ff, ffrstmux) +		if (!(nusers(argQ) >= 3 && nusers(dffD) == 2)) +			argD = SigSpec(); +	} +	else +		dffrstmux = nullptr; +endcode + +// (3) Match for a $mux cell implement clock enable semantics --- one that +//     exclusively drives the 'D' input of the $dff (or the other input of +//     the reset $mux) and where one of this $mux's inputs is connected to +//     the 'Q' output of the $dff +match ffcemux +	if !argD.empty() +	select ffcemux->type.in($mux) +	index <SigSpec> port(ffcemux, \Y) === argD +	choice <IdString> AB {\A, \B} +	index <SigSpec> port(ffcemux, AB) === argQ +	define <bool> pol (AB == \A) +	set ffcepol pol +	semioptional +endmatch + +code argD +	if (ffcemux) { +		dffcemux = ffcemux; +		dffcepol = ffcepol; +		argD = port(ffcemux, ffcepol ? \B : \A); +		dffD.replace(port(ffcemux, \Y), argD); +	} +	else +		dffcemux = nullptr; +endcode + +// ####################### + +// Subpattern for matching against output registers, based on knowledge of the +//   'D' input. +// At a high level: +//   (1) Starting from an optional $mux cell that implements clock enable +//       semantics --- one where the given 'D' argument (partially or fully) +//       drives one of its two inputs +//   (2) Starting from, or continuing onto, another optional $mux cell that +//       implements synchronous reset semantics --- one where the given 'D' +//       argument (or the clock enable $mux output) drives one of its two inputs +//       and where the other input is fully zero +//   (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +//       output of the previous clock enable or reset $mux cells) +subpattern out_dffe +arg argD argQ clock + +code +	dff = nullptr; +	for (auto c : argD.chunks()) +		// Abandon matches when 'D' has the keep attribute set +		if (c.wire->get_bool_attribute(\keep)) +			reject; +endcode + +// (1) Starting from an optional $mux cell that implements clock enable +//     semantics --- one where the given 'D' argument (partially or fully) +//     drives one of its two inputs +match ffcemux +	select ffcemux->type.in($mux) +	// ffcemux output must have two users: ffcemux and ff.D +	select nusers(port(ffcemux, \Y)) == 2 + +	choice <IdString> AB {\A, \B} +	// keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s) +	select nusers(port(ffcemux, AB)) >= 3 + +	slice offset GetSize(port(ffcemux, \Y)) +	define <IdString> BA (AB == \A ? \B : \A) +	index <SigBit> port(ffcemux, BA)[offset] === argD[0] + +	// Check that the rest of argD is present +	filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD) +	filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD + +	set ffoffset offset +	define <bool> pol (AB == \A) +	set ffcepol pol + +	semioptional +endmatch + +code argD argQ +	dffcemux = ffcemux; +	if (ffcemux) { +		SigSpec BA = port(ffcemux, ffcepol ? \B : \A); +		SigSpec Y = port(ffcemux, \Y); +		argQ = argD; +		argD.replace(BA, Y); +		argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B)); + +		dffcemux = ffcemux; +		dffcepol = ffcepol; +	} +endcode + +// (2) Starting from, or continuing onto, another optional $mux cell that +//     implements synchronous reset semantics --- one where the given 'D' +//     argument (or the clock enable $mux output) drives one of its two inputs +//     and where the other input is fully zero +match ffrstmux +	select ffrstmux->type.in($mux) +	// ffrstmux output must have two users: ffrstmux and ff.D +	select nusers(port(ffrstmux, \Y)) == 2 + +	choice <IdString> BA {\B, \A} +	// DSP48E1 only supports reset to zero +	select port(ffrstmux, BA).is_fully_zero() + +	slice offset GetSize(port(ffrstmux, \Y)) +	define <IdString> AB (BA == \B ? \A : \B) +	index <SigBit> port(ffrstmux, AB)[offset] === argD[0] + +	// Check that offset is consistent +	filter !ffcemux || ffoffset == offset +	// Check that the rest of argD is present +	filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD) +	filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD + +	set ffoffset offset +	define <bool> pol (AB == \A) +	set ffrstpol pol + +	semioptional +endmatch + +code argD argQ +	dffrstmux = ffrstmux; +	if (ffrstmux) { +		SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B); +		SigSpec Y = port(ffrstmux, \Y); +		argD.replace(AB, Y); + +		dffrstmux = ffrstmux; +		dffrstpol = ffrstpol; +	} +endcode + +// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the +//     output of the previous clock enable or reset $mux cells) +match ff +	select ff->type.in($dff) +	// DSP48E1 does not support clock inversion +	select param(ff, \CLK_POLARITY).as_bool() + +	slice offset GetSize(port(ff, \D)) +	index <SigBit> port(ff, \D)[offset] === argD[0] + +	// Check that offset is consistent +	filter (!ffcemux && !ffrstmux) || ffoffset == offset +	// Check that the rest of argD is present +	filter GetSize(port(ff, \D)) >= offset + GetSize(argD) +	filter port(ff, \D).extract(offset, GetSize(argD)) == argD +	// Check that FF.Q is connected to CE-mux +	filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + +	filter clock == SigBit() || port(ff, \CLK) == clock + +	set ffoffset offset +endmatch + +code argQ +	SigSpec D = port(ff, \D); +	SigSpec Q = port(ff, \Q); +	if (!ffcemux) { +		argQ = argD; +		argQ.replace(D, Q); +	} + +	// Abandon matches when 'Q' has a non-zero init attribute set +	// (not supported by DSP48E1) +	for (auto c : argQ.chunks()) { +		Const init = c.wire->attributes.at(\init, Const()); +		if (!init.empty()) +			for (auto b : init.extract(c.offset, c.width)) +				if (b != State::Sx && b != State::S0) +					reject; +	} + +	dff = ff; +	dffQ = argQ; +	dffclock = port(ff, \CLK); +endcode diff --git a/passes/pmgen/xilinx_dsp_CREG.pmg b/passes/pmgen/xilinx_dsp_CREG.pmg index 5cd34162e..b20e4f458 100644 --- a/passes/pmgen/xilinx_dsp_CREG.pmg +++ b/passes/pmgen/xilinx_dsp_CREG.pmg @@ -1,7 +1,7 @@  // This file describes the second of three pattern matcher setups that  //   forms the `xilinx_dsp` pass described in xilinx_dsp.cc  // At a high level, it works as follows: -//   (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +//   (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,  //       and (b) uses the 'C' port  //   (2) Match the driver of the 'C' input to a possible $dff cell (CREG)  //       (attached to at most two $mux cells that implement clock-enable or @@ -38,10 +38,10 @@ udata <SigBit> dffclock  udata <Cell*> dff dffcemux dffrstmux  udata <bool> dffcepol dffrstpol -// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already, +// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,  //     and (b) uses the 'C' port  match dsp -	select dsp->type.in(\DSP48E1) +	select dsp->type.in(\DSP48A, \DSP48A1, \DSP48E1)  	select param(dsp, \CREG, 1).as_int() == 0  	select nusers(port(dsp, \C, SigSpec())) > 1  endmatch @@ -60,7 +60,8 @@ code sigC sigP clock  	sigC = unextend(port(dsp, \C, SigSpec()));  	SigSpec P = port(dsp, \P); -	if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") { +	if (!dsp->type.in(\DSP48E1) || + param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") {  		// Only care about those bits that are used  		int i;  		for (i = GetSize(P)-1; i >= 0; i--) diff --git a/passes/pmgen/xilinx_dsp_cascade.pmg b/passes/pmgen/xilinx_dsp_cascade.pmg index 7a32df2b7..b14a1ee0a 100644 --- a/passes/pmgen/xilinx_dsp_cascade.pmg +++ b/passes/pmgen/xilinx_dsp_cascade.pmg @@ -62,12 +62,11 @@ code  #define MAX_DSP_CASCADE 20  endcode -// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer -//     (controlled by OPMODE[6:4]) set to zero and (b) doesn't already -//     use the 'PCOUT' port +// (1) Starting from a DSP48* cell that (a) has the Z multiplexer +//     (controlled by OPMODE[3:2] for DSP48A*, by OPMODE[6:4] for DSP48E1) +//     set to zero and (b) doesn't already use the 'PCOUT' port  match first -	select first->type.in(\DSP48E1) -	select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000") +	select (first->type.in(\DSP48A, \DSP48A1) && port(first, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("00")) || (first->type.in(\DSP48E1) && port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000"))  	select nusers(port(first, \PCOUT, SigSpec())) <= 1  endmatch @@ -100,14 +99,21 @@ finally  					add_siguser(cascade, dsp);  					SigSpec opmode = port(dsp_pcin, \OPMODE, Const(0, 7)); -					if (P == 17) -						opmode[6] = State::S1; -					else if (P == 0) -						opmode[6] = State::S0; -					else log_abort(); - -					opmode[5] = State::S0; -					opmode[4] = State::S1; +					if (dsp->type.in(\DSP48A, \DSP48A1)) { +						log_assert(P == 0); +						opmode[3] = State::S0; +						opmode[2] = State::S1; +					} +					else if (dsp->type.in(\DSP48E1)) { +						if (P == 17) +							opmode[6] = State::S1; +						else if (P == 0) +							opmode[6] = State::S0; +						else log_abort(); + +						opmode[5] = State::S0; +						opmode[4] = State::S1; +					}  					dsp_pcin->setPort(\OPMODE, opmode);  					log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin)); @@ -120,21 +126,42 @@ finally  					add_siguser(cascade, dsp_pcin);  					add_siguser(cascade, dsp); -					dsp->setParam(ID(ACASCREG), AREG); +					if (dsp->type.in(\DSP48E1)) +						dsp->setParam(ID(ACASCREG), AREG);  					dsp_pcin->setParam(ID(A_INPUT), Const("CASCADE"));  					log_debug("ACOUT -> ACIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));  				}  				if (BREG >= 0) {  					Wire *cascade = module->addWire(NEW_ID, 18); -					dsp_pcin->setPort(ID(B), Const(0, 18)); -					dsp_pcin->setPort(ID(BCIN), cascade); +					if (dsp->type.in(\DSP48A, \DSP48A1)) { +						// According to UG389 p9 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf] +						// "The DSP48A1 component uses this input when cascading +						//   BCOUT from an adjacent DSP48A1 slice. The tools then +						//   translate BCOUT cascading to the dedicated BCIN input +						//   and set the B_INPUT attribute for implementation." +						dsp_pcin->setPort(ID(B), cascade); +					} +					else { +						dsp_pcin->setPort(ID(B), Const(0, 18)); +						dsp_pcin->setPort(ID(BCIN), cascade); +					}  					dsp->setPort(ID(BCOUT), cascade);  					add_siguser(cascade, dsp_pcin);  					add_siguser(cascade, dsp); -					dsp->setParam(ID(BCASCREG), BREG); -					dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE")); +					if (dsp->type.in(\DSP48E1)) { +						dsp->setParam(ID(BCASCREG), BREG); +						// According to UG389 p13 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf] +						// "The attribute is only used by place and route tools and +						//   is not necessary for the users to set for synthesis. The +						//   attribute is determined by the connection to the B port +						//   of the DSP48A1 slice. If the B port is connected to the +						//   BCOUT of another DSP48A1 slice, then the tools automatically +						//   set the attribute to 'CASCADE', otherwise it is set to +						//   'DIRECT'". +						dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE")); +					}  					log_debug("BCOUT -> BCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));  				} @@ -156,22 +183,21 @@ subpattern tail  arg first  arg next -// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled, +// (2.1) Match another DSP48* cell that (a) does not have the CREG enabled,  //       (b) has its Z multiplexer output set to the 'C' port, which is  //       driven by the 'P' output of the previous DSP cell, and (c) has its  //       'PCIN' port unused  match nextP -	select nextP->type.in(\DSP48E1)  	select !param(nextP, \CREG, State::S1).as_bool() -	select port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011") +	select (nextP->type.in(\DSP48A, \DSP48A1) && port(nextP, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("11")) || (nextP->type.in(\DSP48E1) && port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011"))  	select nusers(port(nextP, \C, SigSpec())) > 1  	select nusers(port(nextP, \PCIN, SigSpec())) == 0  	index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0]  	semioptional  endmatch -// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the -//       previous DSP cell right-shifted by 17 bits +// (2.2) For DSP48E1 only, same as (2.1) but with the 'C' port driven +//       by the 'P' output of the previous DSP cell right-shifted by 17 bits  match nextP_shift17  	if !nextP  	select nextP_shift17->type.in(\DSP48E1) @@ -188,6 +214,8 @@ code next  	if (!nextP)  		next = nextP_shift17;  	if (next) { +		if (next->type != first->type) +			reject;  		unextend = [](const SigSpec &sig) {  			int i;  			for (i = GetSize(sig)-1; i > 0; i--) @@ -202,38 +230,50 @@ code next  endcode  // (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists) -//     if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this -//     DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already -//     have an ACOUT -> ACIN cascade, (d) the previous DSP does not already -//     use its ACOUT port, then examine if an ACOUT -> ACIN cascade -//     opportunity exists by matching for a $dff-with-optional-clock-enable- -//     or-reset and checking that the 'D' input of this register is the same -//     as the 'A' input of the previous DSP +//     if (a) this DSP48E1 does not already have an ACOUT -> ACIN cascade, +//     (b) the previous DSP does  not already use its ACOUT port, then +//     examine if an ACOUT -> ACIN cascade  opportunity exists if +//     (i) A ports are identical, or (ii) separated by a +//     $dff-with-optional-clock-enable-or-reset and checking that the 'D' input +//     of this register is the same as the 'A' input of the previous DSP +//     TODO: Check for two levels of flops, instead of just one  code argQ clock AREG  	AREG = -1; -	if (next) { +	if (next && next->type.in(\DSP48E1)) {  		Cell *prev = std::get<0>(chain.back()); -		if (param(prev, \AREG, 2).as_int() > 0 && -				param(next, \AREG, 2).as_int() > 0 && -				param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && + +		if (param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && +				port(next, \ACIN, SigSpec()).is_fully_zero() &&  				nusers(port(prev, \ACOUT, SigSpec())) <= 1) { -			argQ = unextend(port(next, \A)); -			clock = port(prev, \CLK); -			subpattern(in_dffe); -			if (dff) { -				if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0) -					goto reject_AREG; -				if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) -					goto reject_AREG; -				if (!dffcemux && port(prev, \CEA2, State::S0) != State::S0) -					goto reject_AREG; -				if (dffcemux && port(dffcemux, \S) != port(prev, \CEA2, State::S0)) -					goto reject_AREG; -				if (dffD == unextend(port(prev, \A))) -					AREG = 1; -reject_AREG:			; +			if (param(prev, \AREG, 2) == 0) { +				if (port(prev, \A) == port(next, \A)) +					AREG = 0; +			} +			else { +				argQ = unextend(port(next, \A)); +				clock = port(prev, \CLK); +				subpattern(in_dffe); +				if (dff) { +					if (!dffrstmux && port(prev, \RSTA, State::S0) != State::S0) +						goto reject_AREG; +					if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTA, State::S0)) +						goto reject_AREG; +					IdString CEA; +					if (param(prev, \AREG, 2) == 1) +						CEA = \CEA2; +					else if (param(prev, \AREG, 2) == 2) +						CEA = \CEA1; +					else log_abort(); +					if (!dffcemux && port(prev, CEA, State::S0) != State::S1) +						goto reject_AREG; +					if (dffcemux && port(dffcemux, \S) != port(prev, CEA, State::S0)) +						goto reject_AREG; +					if (dffD == unextend(port(prev, \A))) +						AREG = 1; +				}  			}  		} +reject_AREG:	;  	}  endcode @@ -242,28 +282,47 @@ code argQ clock BREG  	BREG = -1;  	if (next) {  		Cell *prev = std::get<0>(chain.back()); -		if (param(prev, \BREG, 2).as_int() > 0 && -				param(next, \BREG, 2).as_int() > 0 && -				param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" && +		if (param(next, \B_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&  				port(next, \BCIN, SigSpec()).is_fully_zero() &&  				nusers(port(prev, \BCOUT, SigSpec())) <= 1) { -			argQ = unextend(port(next, \B)); -			clock = port(prev, \CLK); -			subpattern(in_dffe); -			if (dff) { -				if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0) -					goto reject_BREG; -				if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) -					goto reject_BREG; -				if (!dffcemux && port(prev, \CEB2, State::S0) != State::S0) -					goto reject_BREG; -				if (dffcemux && port(dffcemux, \S) != port(prev, \CEB2, State::S0)) -					goto reject_BREG; -				if (dffD == unextend(port(prev, \B))) -					BREG = 1; -reject_BREG:			; +			if ((next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG, 0) == 0 && param(prev, \B1REG, 1) == 0) || +				(next->type.in(\DSP48E1) && param(prev, \BREG, 2) == 0)) { +				if (port(prev, \B) == port(next, \B)) +					BREG = 0; +			} +			else { +				argQ = unextend(port(next, \B)); +				clock = port(prev, \CLK); +				subpattern(in_dffe); +				if (dff) { +					if (!dffrstmux && port(prev, \RSTB, State::S0) != State::S0) +						goto reject_BREG; +					if (dffrstmux && port(dffrstmux, \S) != port(prev, \RSTB, State::S0)) +						goto reject_BREG; +					IdString CEB; +					if (next->type.in(\DSP48A, \DSP48A1)) +						CEB = \CEB; +					else if (next->type.in(\DSP48E1)) { +						if (param(prev, \BREG, 2) == 1) +							CEB = \CEB2; +						else if (param(prev, \BREG, 2) == 2) +							CEB = \CEB1; +						else log_abort(); +					} +					else log_abort(); +					if (!dffcemux && port(prev, CEB, State::S0) != State::S1) +						goto reject_BREG; +					if (dffcemux && port(dffcemux, \S) != port(prev, CEB, State::S0)) +						goto reject_BREG; +					if (dffD == unextend(port(prev, \B))) { +						if (next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG, 0) != 0) +							goto reject_BREG; +						BREG = 1; +					} +				}  			}  		} +reject_BREG:	;  	}  endcode | 
