/* * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2019-2020 whitequark * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself. #ifndef CXXRTL_H #define CXXRTL_H #include #include #include #include #include #include #include #include #include #include #include // The cxxrtl support library implements compile time specialized arbitrary width arithmetics, as well as provides // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler // to unwrap the abstraction and generate efficient code. namespace cxxrtl { // All arbitrary-width values in cxxrtl are backed by arrays of unsigned integers called chunks. The chunk size // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving // and introspecting the simulation in Python. // // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register. // However, (a) most of our operations do not change those bits in the first place because of invariants that are // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway. // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be // clobbered results in simpler generated code. template struct chunk_traits { static_assert(std::is_integral::value && std::is_unsigned::value, "chunk type must be an unsigned integral type"); using type = T; static constexpr size_t bits = std::numeric_limits::digits; static constexpr T mask = std::numeric_limits::max(); }; template struct expr_base; template struct value : public expr_base> { static constexpr size_t bits = Bits; using chunk = chunk_traits; static constexpr chunk::type msb_mask = (Bits % chunk::bits == 0) ? chunk::mask : chunk::mask >> (chunk::bits - (Bits % chunk::bits)); static constexpr size_t chunks = (Bits + chunk::bits - 1) / chunk::bits; chunk::type data[chunks] = {}; value() = default; template explicit constexpr value(Init ...init) : data{init...} {} value(const value &) = default; value(value &&) = default; value &operator=(const value &) = default; // A (no-op) helper that forces the cast to value<>. const value &val() const { return *this; } std::string str() const { std::stringstream ss; ss << *this; return ss.str(); } // Operations with compile-time parameters. // // These operations are used to implement slicing, concatenation, and blitting. // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left); // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right). template value trunc() const { static_assert(NewBits <= Bits, "trunc() may not increase width"); value result; for (size_t n = 0; n < result.chunks; n++) result.data[n] = data[n]; result.data[result.chunks - 1] &= result.msb_mask; return result; } template value zext() const { static_assert(NewBits >= Bits, "zext() may not decrease width"); value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n]; return result; } template value sext() const { static_assert(NewBits >= Bits, "sext() may not decrease width"); value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n]; if (is_neg()) { result.data[chunks - 1] |= ~msb_mask; for (size_t n = chunks; n < result.chunks; n++) result.data[n] = chunk::mask; result.data[result.chunks - 1] &= result.msb_mask; } return result; } template value rtrunc() const { static_assert(NewBits <= Bits, "rtrunc() may not increase width"); value result; constexpr size_t shift_chunks = (Bits - NewBits) / chunk::bits; constexpr size_t shift_bits = (Bits - NewBits) % chunk::bits; chunk::type carry = 0; if (shift_chunks + result.chunks < chunks) { carry = (shift_bits == 0) ? 0 : data[shift_chunks + result.chunks] << (chunk::bits - shift_bits); } for (size_t n = result.chunks; n > 0; n--) { result.data[n - 1] = carry | (data[shift_chunks + n - 1] >> shift_bits); carry = (shift_bits == 0) ? 0 : data[shift_chunks + n - 1] << (chunk::bits - shift_bits); } return result; } template value rzext() const { static_assert(NewBits >= Bits, "rzext() may not decrease width"); value result; constexpr size_t shift_chunks = (NewBits - Bits) / chunk::bits; constexpr size_t shift_bits = (NewBits - Bits) % chunk::bits; chunk::type carry = 0; for (size_t n = 0; n < chunks; n++) { result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } if (carry != 0) result.data[result.chunks - 1] = carry; return result; } // Bit blit operation, i.e. a partial read-modify-write. template value blit(const value &source) const { static_assert(Stop >= Start, "blit() may not reverse bit order"); constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits)); constexpr chunk::type stop_mask = (Stop % chunk::bits + 1 == chunk::bits) ? 0 : (chunk::mask << (Stop % chunk::bits + 1)); value masked = *this; if (Start / chunk::bits == Stop / chunk::bits) { masked.data[Start / chunk::bits] &= stop_mask | start_mask; } else { masked.data[Start / chunk::bits] &= start_mask; for (size_t n = Start / chunk::bits + 1; n < Stop / chunk::bits; n++) masked.data[n] = 0; masked.data[Stop / chunk::bits] &= stop_mask; } value shifted = source .template rzext() .template zext(); return masked.bit_or(shifted); } // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower // than the operand. In C++17 these can be replaced with `if constexpr`. template struct zext_cast { value operator()(const value &val) { return val.template zext(); } }; template struct zext_cast::type> { value operator()(const value &val) { return val.template trunc(); } }; template struct sext_cast { value operator()(const value &val) { return val.template sext(); } }; template struct sext_cast::type> { value operator()(const value &val) { return val.template trunc(); } }; template value zcast() const { return zext_cast()(*this); } template value scast() const { return sext_cast()(*this); } // Operations with run-time parameters (offsets, amounts, etc). // // These operations are used for computations. bool bit(size_t offset) const { return data[offset / chunk::bits] & (1 << (offset % chunk::bits)); } void set_bit(size_t offset, bool value = true) { size_t offset_chunks = offset / chunk::bits; size_t offset_bits = offset % chunk::bits; data[offset_chunks] &= ~(1 << offset_bits); data[offset_chunks] |= value ? 1 << offset_bits : 0; } bool is_zero() const { for (size_t n = 0; n < chunks; n++) if (data[n] != 0) return false; return true; } explicit operator bool() const { return !is_zero(); } bool is_neg() const { return data[chunks - 1] & (1 << ((Bits - 1) % chunk::bits)); } bool operator ==(const value &other) const { for (size_t n = 0; n < chunks; n++) if (data[n] != other.data[n]) return false; return true; } bool operator !=(const value &other) const { return !(*this == other); } value bit_not() const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = ~data[n]; result.data[chunks - 1] &= msb_mask; return result; } value bit_and(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] & other.data[n]; return result; } value bit_or(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] | other.data[n]; return result; } value bit_xor(const value &other) const { value result; for (size_t n = 0; n < chunks; n++) result.data[n] = data[n] ^ other.data[n]; return result; } value update(const value &val, const value &mask) const { return bit_and(mask.bit_not()).bit_or(val.bit_and(mask)); } template value shl(const value &amount) const { // Ensure our early return is correct by prohibiting values larger than 4 Gbit. static_assert(Bits <= chunk::mask, "shl() of unreasonably large values is not supported"); // Detect shifts definitely large than Bits early. for (size_t n = 1; n < amount.chunks; n++) if (amount.data[n] != 0) return {}; // Past this point we can use the least significant chunk as the shift size. size_t shift_chunks = amount.data[0] / chunk::bits; size_t shift_bits = amount.data[0] % chunk::bits; if (shift_chunks >= chunks) return {}; value result; chunk::type carry = 0; for (size_t n = 0; n < chunks - shift_chunks; n++) { result.data[shift_chunks + n] = (data[n] << shift_bits) | carry; carry = (shift_bits == 0) ? 0 : data[n] >> (chunk::bits - shift_bits); } return result; } template value shr(const value &amount) const { // Ensure our early return is correct by prohibiting values larger than 4 Gbit. static_assert(Bits <= chunk::mask, "shr() of unreasonably large values is not supported"); // Detect shifts definitely large than Bits early. for (size_t n = 1; n < amount.chunks; n++) if (amount.data[n] != 0) return {}; // Past this point we can use the least significant chunk as the shift size. size_t shift_chunks = amount.data[0] / chunk::bits; size_t shift_bits = amount.data[0] % chunk::bits; if (shift_chunks >= chunks) return {}; value result; chunk::type carry = 0; for (size_t n = 0; n < chunks - shift_chunks; n++) { result.data[chunks - shift_chunks - 1 - n] = carry | (data[chunks - 1 - n] >> shift_bits); carry = (shift_bits == 0) ? 0 : data[chunks - 1 - n] << (chunk::bits - shift_bits); } if (Signed && is_neg()) { for (size_t n = chunks - shift_chunks; n < chunks; n++) result.data[n] = chunk::mask; if (shift_bits != 0) result.data[chunks - shift_chunks] |= chunk::mask << (chunk::bits - shift_bits); } return result; } template value sshr(const value &amount) const { return shr(amount); } size_t ctpop() const { size_t count = 0; for (size_t n = 0; n < chunks; n++) { // This loop implements the population count idiom as recognized by LLVM and GCC. for (chunk::type x = data[n]; x != 0; count++) x = x & (x - 1); } return count; } size_t ctlz() const { size_t count = 0; for (size_t n = 0; n < chunks; n++) { chunk::type x = data[chunks - 1 - n]; if (x == 0) { count += (n == 0 ? Bits % chunk::bits : chunk::bits); } else { // This loop implements the find first set idiom as recognized by LLVM. for (; x != 0; count++) x >>= 1; } } return count; } template std::pair, bool /*CarryOut*/> alu(const value &other) const { value result; bool carry = CarryIn; for (size_t n = 0; n < result.chunks; n++) { result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry; carry = (result.data[n] < data[n]) || (result.data[n] == data[n] && carry); } result.data[result.chunks - 1] &= result.msb_mask; return {result, carry}; } value add(const value &other) const { return alu(other).first; } value sub(const value &other) const { return alu(other).first; } value neg() const { return value { 0u }.sub(*this); } bool ucmp(const value &other) const { bool carry; std::tie(std::ignore, carry) = alu(other); return !carry; // a.ucmp(b) ≡ a u< b } bool scmp(const value &other) const { value result; bool carry; std::tie(result, carry) = alu(other); bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg()); return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b } }; // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here. template struct slice_expr : public expr_base> { static_assert(Stop >= Start, "slice_expr() may not reverse bit order"); static_assert(Start < T::bits && Stop < T::bits, "slice_expr() must be within bounds"); static constexpr size_t bits = Stop - Start + 1; T &expr; slice_expr(T &expr) : expr(expr) {} slice_expr(const slice_expr &) = delete; operator value() const { return static_cast &>(expr) .template rtrunc() .template trunc(); } slice_expr &operator=(const value &rhs) { // Generic partial assignment implemented using a read-modify-write operation on the sliced expression. expr = static_cast &>(expr) .template blit(rhs); return *this; } // A helper that forces the cast to value<>, which allows deduction to work. value val() const { return static_cast &>(*this); } }; // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here. template struct concat_expr : public expr_base> { static constexpr size_t bits = T::bits + U::bits; T &ms_expr; U &ls_expr; concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {} concat_expr(const concat_expr &) = delete; operator value() const { value ms_shifted = static_cast &>(ms_expr) .template rzext(); value ls_extended = static_cast &>(ls_expr) .template zext(); return ms_shifted.bit_or(ls_extended); } concat_expr &operator=(const value &rhs) { ms_expr = rhs.template rtrunc(); ls_expr = rhs.template trunc(); return *this; } // A helper that forces the cast to value<>, which allows deduction to work. value val() const { return static_cast &>(*this); } }; // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues. // // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of // these snippets perform use-after-free: // // const auto &a = val.slice<7,0>().slice<1>(); // value<1> b = a; // // auto &&c = val.slice<7,0>().slice<1>(); // c = value<1>{1u}; // // An easy way to write code using slices and concatenations safely is to follow two simple rules: // * Never explicitly name any type except `value` or `const value &`. // * Never use a `const auto &` or `auto &&` in any such expression. // Then, any code that compiles will be well-defined. template struct expr_base { template slice_expr slice() const { return {*static_cast(this)}; } template slice_expr slice() { return {*static_cast(this)}; } template concat_expr::type> concat(const U &other) const { return {*static_cast(this), other}; } template concat_expr::type> concat(U &&other) { return {*static_cast(this), other}; } }; template std::ostream &operator<<(std::ostream &os, const value &val) { auto old_flags = os.flags(std::ios::right); auto old_width = os.width(0); auto old_fill = os.fill('0'); os << val.bits << '\'' << std::hex; for (size_t n = val.chunks - 1; n != (size_t)-1; n--) { if (n == val.chunks - 1 && Bits % value::chunk::bits != 0) os.width((Bits % value::chunk::bits + 3) / 4); else os.width((value::chunk::bits + 3) / 4); os << val.data[n]; } os.fill(old_fill); os.width(old_width); os.flags(old_flags); return os; } template struct wire { static constexpr size_t bits = Bits; value curr; value next; wire() = default; constexpr wire(const value &init) : curr(init), next(init) {} template explicit constexpr wire(Init ...init) : curr{init...}, next{init...} {} wire(const wire &) = delete; wire(wire &&) = default; wire &operator=(const wire &) = delete; bool commit() { if (curr != next) { curr = next; return true; } return false; } }; template std::ostream &operator<<(std::ostream &os, const wire &val) { os << val.curr; return os; } template struct memory { std::vector> data; size_t depth() const { return data.size(); } memory() = delete; explicit memory(size_t depth) : data(depth) {} memory(const memory &) = delete; memory &operator=(const memory &) = delete; // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't // construct an initializer_list in a constexpr (or something) and so if you try to do that the whole thing is // first copied on the stack (probably overflowing it) and then again into `data`. template struct init { size_t offset; value data[Size]; }; template explicit memory(size_t depth, const init &...init) : data(depth) { data.resize(depth); // This utterly reprehensible construct is the most reasonable way to apply a function to every element // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list. auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...}; } // An operator for direct memory reads. May be used at any time during the simulation. const value &operator [](size_t index) const { assert(index < data.size()); return data[index]; } // An operator for direct memory writes. May only be used before the simulation is started. If used // after the simulation is started, the design may malfunction. value &operator [](size_t index) { assert(index < data.size()); return data[index]; } // A simple way to make a writable memory would be to use an array of wires instead of an array of values. // However, there are two significant downsides to this approach: first, it has large overhead (2× space // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port // priorities. Although in principle write ports could be ordered or conditionally enabled in generated // code based on their priorities and selected addresses, the feedback arc set problem is computationally // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer) // a particular write port evaluation order. // // The approach used here instead is to queue writes into a buffer during the eval phase, then perform // the writes during the commit phase in the priority order. This approach has low overhead, with both space // and time proportional to the amount of write ports. Because virtually every memory in a practical design // has at most two write ports, linear search is used on every write, being the fastest and simplest approach. struct write { size_t index; value val; value mask; int priority; }; std::vector write_queue; void update(size_t index, const value &val, const value &mask, int priority = 0) { assert(index < data.size()); write_queue.emplace_back(write { index, val, mask, priority }); } bool commit() { bool changed = false; std::sort(write_queue.begin(), write_queue.end(), [](const write &a, const write &b) { return a.priority < b.priority; }); for (const write &entry : write_queue) { value elem = data[entry.index]; elem = elem.update(entry.val, entry.mask); changed |= (data[entry.index] != elem); data[entry.index] = elem; } write_queue.clear(); return changed; } }; struct metadata { const enum { MISSING = 0, UINT = 1, SINT = 2, STRING = 3, DOUBLE = 4, } value_type; // In debug mode, using the wrong .as_*() function will assert. // In release mode, using the wrong .as_*() function will safely return a default value. union { const unsigned uint_value = 0; const signed sint_value; }; const std::string string_value = ""; const double double_value = 0.0; metadata() : value_type(MISSING) {} metadata(unsigned value) : value_type(UINT), uint_value(value) {} metadata(signed value) : value_type(SINT), sint_value(value) {} metadata(const std::string &value) : value_type(STRING), string_value(value) {} metadata(const char *value) : value_type(STRING), string_value(value) {} metadata(double value) : value_type(DOUBLE), double_value(value) {} metadata(const metadata &) = default; metadata &operator=(const metadata &) = delete; unsigned as_uint() const { assert(value_type == UINT); return uint_value; } signed as_sint() const { assert(value_type == SINT); return sint_value; } const std::string &as_string() const { assert(value_type == STRING); return string_value; } double as_double() const { assert(value_type == DOUBLE); return double_value; } }; typedef std::map metadata_map; struct module { module() {} virtual ~module() {} module(const module &) = delete; module &operator=(const module &) = delete; virtual bool eval() = 0; virtual bool commit() = 0; size_t step() { size_t deltas = 0; bool converged = false; do { converged = eval(); deltas++; } while (commit() && !converged); return deltas; } }; } // namespace cxxrtl // Definitions of internal Yosys cells. Other than the functions in this namespace, cxxrtl is fully generic // and indepenent of Yosys implementation details. // // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands, // whereas basic operations on arbitrary width values require operands to be of the same width. These functions // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u` // if the corresponding operand is unsigned, and `s` if it is signed. namespace cxxrtl_yosys { using namespace cxxrtl; // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own. template constexpr T max(const T &a, const T &b) { return a > b ? a : b; } // Logic operations template value not_u(const value &a) { return a.template zcast().bit_not(); } template value not_s(const value &a) { return a.template scast().bit_not(); } template value logic_not_u(const value &a) { return value { a ? 0u : 1u }; } template value logic_not_s(const value &a) { return value { a ? 0u : 1u }; } template value reduce_and_u(const value &a) { return value { a.bit_not().is_zero() ? 1u : 0u }; } template value reduce_and_s(const value &a) { return value { a.bit_not().is_zero() ? 1u : 0u }; } template value reduce_or_u(const value &a) { return value { a ? 1u : 0u }; } template value reduce_or_s(const value &a) { return value { a ? 1u : 0u }; } template value reduce_xor_u(const value &a) { return value { (a.ctpop() % 2) ? 1u : 0u }; } template value reduce_xor_s(const value &a) { return value { (a.ctpop() % 2) ? 1u : 0u }; } template value reduce_xnor_u(const value &a) { return value { (a.ctpop() % 2) ? 0u : 1u }; } template value reduce_xnor_s(const value &a) { return value { (a.ctpop() % 2) ? 0u : 1u }; } template value reduce_bool_u(const value &a) { return value { a ? 1u : 0u }; } template value reduce_bool_s(const value &a) { return value { a ? 1u : 0u }; } template value and_uu(const value &a, const value &b) { return a.template zcast().bit_and(b.template zcast()); } template value and_ss(const value &a, const value &b) { return a.template scast().bit_and(b.template scast()); } template value or_uu(const value &a, const value &b) { return a.template zcast().bit_or(b.template zcast()); } template value or_ss(const value &a, const value &b) { return a.template scast().bit_or(b.template scast()); } template value xor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()); } template value xor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()); } template value xnor_uu(const value &a, const value &b) { return a.template zcast().bit_xor(b.template zcast()).bit_not(); } template value xnor_ss(const value &a, const value &b) { return a.template scast().bit_xor(b.template scast()).bit_not(); } template value logic_and_uu(const value &a, const value &b) { return value { (bool(a) & bool(b)) ? 1u : 0u }; } template value logic_and_ss(const value &a, const value &b) { return value { (bool(a) & bool(b)) ? 1u : 0u }; } template value logic_or_uu(const value &a, const value &b) { return value { (bool(a) | bool(b)) ? 1u : 0u }; } template value logic_or_ss(const value &a, const value &b) { return value { (bool(a) | bool(b)) ? 1u : 0u }; } template value shl_uu(const value &a, const value &b) { return a.template zcast().template shl(b); } template value shl_su(const value &a, const value &b) { return a.template scast().template shl(b); } template value sshl_uu(const value &a, const value &b) { return a.template zcast().template shl(b); } template value sshl_su(const value &a, const value &b) { return a.template scast().template shl(b); } template value shr_uu(const value &a, const value &b) { return a.template shr(b).template zcast(); } template value shr_su(const value &a, const value &b) { return a.template shr(b).template scast(); } template value sshr_uu(const value &a, const value &b) { return a.template shr(b).template zcast(); } template value sshr_su(const value &a, const value &b) { return a.template shr(b).template scast(); } template value shift_uu(const value &a, const value &b) { return shr_uu(a, b); } template value shift_su(const value &a, const value &b) { return shr_su(a, b); } template value shift_us(const value &a, const value &b) { return b.is_neg() ? shl_uu(a, b.template sext().neg()) : shr_uu(a, b); } template value shift_ss(const value &a, const value &b) { return b.is_neg() ? shl_su(a, b.template sext().neg()) : shr_su(a, b); } template value shiftx_uu(const value &a, const value &b) { return shift_uu(a, b); } template value shiftx_su(const value &a, const value &b) { return shift_su(a, b); } template value shiftx_us(const value &a, const value &b) { return shift_us(a, b); } template value shiftx_ss(const value &a, const value &b) { return shift_ss