/* * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2012 Claire Xenia Wolf * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ #include "kernel/yosys.h" #include "kernel/mem.h" #include "kernel/ffinit.h" USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN struct rules_t { struct portinfo_t { int group, index, dupidx; int wrmode, enable, transp, clocks, clkpol; SigBit sig_clock; SigSpec sig_addr, sig_data, sig_en; bool effective_clkpol; bool make_transp; bool make_outreg; int mapped_port; }; struct bram_t { IdString name; int variant; int groups, abits, dbits, init; vector ports, wrmode, enable, transp, clocks, clkpol; void dump_config() const { log(" bram %s # variant %d\n", log_id(name), variant); log(" init %d\n", init); log(" abits %d\n", abits); log(" dbits %d\n", dbits); log(" groups %d\n", groups); log(" ports "); for (int v : ports) log("%4d", v); log("\n"); log(" wrmode"); for (int v : wrmode) log("%4d", v); log("\n"); log(" enable"); for (int v : enable) log("%4d", v); log("\n"); log(" transp"); for (int v : transp) log("%4d", v); log("\n"); log(" clocks"); for (int v : clocks) log("%4d", v); log("\n"); log(" clkpol"); for (int v : clkpol) log("%4d", v); log("\n"); log(" endbram\n"); } void check_vectors() const { if (groups != GetSize(ports)) log_error("Bram %s variant %d has %d groups but only %d entries in 'ports'.\n", log_id(name), variant, groups, GetSize(ports)); if (groups != GetSize(wrmode)) log_error("Bram %s variant %d has %d groups but only %d entries in 'wrmode'.\n", log_id(name), variant, groups, GetSize(wrmode)); if (groups != GetSize(enable)) log_error("Bram %s variant %d has %d groups but only %d entries in 'enable'.\n", log_id(name), variant, groups, GetSize(enable)); if (groups != GetSize(transp)) log_error("Bram %s variant %d has %d groups but only %d entries in 'transp'.\n", log_id(name), variant, groups, GetSize(transp)); if (groups != GetSize(clocks)) log_error("Bram %s variant %d has %d groups but only %d entries in 'clocks'.\n", log_id(name), variant, groups, GetSize(clocks)); if (groups != GetSize(clkpol)) log_error("Bram %s variant %d has %d groups but only %d entries in 'clkpol'.\n", log_id(name), variant, groups, GetSize(clkpol)); int group = 0; for (auto e : enable) if (e > dbits) log_error("Bram %s variant %d group %d has %d enable bits but only %d dbits.\n", log_id(name), variant, group, e, dbits); } vector make_portinfos() const { vector portinfos; for (int i = 0; i < groups; i++) for (int j = 0; j < ports[i]; j++) { portinfo_t pi; pi.group = i; pi.index = j; pi.dupidx = 0; pi.wrmode = wrmode[i]; pi.enable = enable[i]; pi.transp = transp[i]; pi.clocks = clocks[i]; pi.clkpol = clkpol[i]; pi.mapped_port = -1; pi.make_transp = false; pi.make_outreg = false; pi.effective_clkpol = false; portinfos.push_back(pi); } return portinfos; } void find_variant_params(dict &variant_params, const bram_t &other) const { log_assert(name == other.name); if (groups != other.groups) log_error("Bram %s variants %d and %d have different values for 'groups'.\n", log_id(name), variant, other.variant); if (abits != other.abits) variant_params[ID::CFG_ABITS] = abits; if (dbits != other.dbits) variant_params[ID::CFG_DBITS] = dbits; if (init != other.init) variant_params[ID::CFG_INIT] = init; for (int i = 0; i < groups; i++) { if (ports[i] != other.ports[i]) log_error("Bram %s variants %d and %d have different number of %c-ports.\n", log_id(name), variant, other.variant, 'A'+i); if (wrmode[i] != other.wrmode[i]) variant_params[stringf("\\CFG_WRMODE_%c", 'A' + i)] = wrmode[i]; if (enable[i] != other.enable[i]) variant_params[stringf("\\CFG_ENABLE_%c", 'A' + i)] = enable[i]; if (transp[i] != other.transp[i]) variant_params[stringf("\\CFG_TRANSP_%c", 'A' + i)] = transp[i]; if (clocks[i] != other.clocks[i]) variant_params[stringf("\\CFG_CLOCKS_%c", 'A' + i)] = clocks[i]; if (clkpol[i] != other.clkpol[i]) variant_params[stringf("\\CFG_CLKPOL_%c", 'A' + i)] = clkpol[i]; } } }; struct match_t { IdString name; dict min_limits, max_limits; bool or_next_if_better, make_transp, make_outreg; char shuffle_enable; vector>> attributes; }; bool attr_icase; dict> brams; vector matches; std::string map_case(std::string value) const { if (attr_icase) { for (char &c : value) c = tolower(c); } return value; } RTLIL::Const map_case(RTLIL::Const value) const { if (value.flags & RTLIL::CONST_FLAG_STRING) return map_case(value.decode_string()); return value; } std::ifstream infile; vector tokens; vector labels; int linecount; void syntax_error() { if (tokens.empty()) log_error("Unexpected end of rules file in line %d.\n", linecount); log_error("Syntax error in rules file line %d.\n", linecount); } bool next_line() { string line; while (std::getline(infile, line)) { tokens.clear(); labels.clear(); linecount++; for (string tok = next_token(line); !tok.empty(); tok = next_token(line)) { if (tok[0] == '@') { labels.push_back(tok.substr(1)); continue; } if (tok[0] == '#') break; tokens.push_back(tok); } if (!tokens.empty()) return true; } return false; } bool parse_single_int(const char *stmt, int &value) { if (GetSize(tokens) == 2 && tokens[0] == stmt) { value = atoi(tokens[1].c_str()); return true; } return false; } bool parse_int_vect(const char *stmt, vector &value) { if (GetSize(tokens) >= 2 && tokens[0] == stmt) { value.resize(GetSize(tokens)-1); for (int i = 1; i < GetSize(tokens); i++) value[i-1] = atoi(tokens[i].c_str()); return true; } return false; } void parse_bram() { IdString bram_name = RTLIL::escape_id(tokens[1]); if (GetSize(tokens) != 2) syntax_error(); vector> lines_nolabels; std::map>> lines_labels; while (next_line()) { if (GetSize(tokens) == 1 && tokens[0] == "endbram") break; if (labels.empty()) lines_nolabels.push_back(tokens); for (auto lab : labels) lines_labels[lab].push_back(tokens); } std::map>> variant_lines; if (lines_labels.empty()) variant_lines[""] = lines_nolabels; for (auto &it : lines_labels) { variant_lines[it.first] = lines_nolabels; variant_lines[it.first].insert(variant_lines[it.first].end(), it.second.begin(), it.second.end()); } for (auto &it : variant_lines) { bram_t data; data.name = bram_name; data.variant = GetSize(brams[data.name]) + 1; data.groups = 0; data.abits = 0; data.dbits = 0; data.init = 0; for (auto &line_tokens : it.second) { tokens = line_tokens; if (parse_single_int("groups", data.groups)) continue; if (parse_single_int("abits", data.abits)) continue; if (parse_single_int("dbits", data.dbits)) continue; if (parse_single_int("init", data.init)) continue; if (parse_int_vect("ports", data.ports)) continue; if (parse_int_vect("wrmode", data.wrmode)) continue; if (parse_int_vect("enable", data.enable)) continue; if (parse_int_vect("transp", data.transp)) continue; if (parse_int_vect("clocks", data.clocks)) continue; if (parse_int_vect("clkpol", data.clkpol)) continue; syntax_error(); } data.check_vectors(); brams[data.name].push_back(data); } } void parse_match() { if (GetSize(tokens) != 2) syntax_error(); match_t data; data.name = RTLIL::escape_id(tokens[1]); data.or_next_if_better = false; data.make_transp = false; data.make_outreg = false; data.shuffle_enable = 0; while (next_line()) { if (!labels.empty()) syntax_error(); if (GetSize(tokens) == 1 && tokens[0] == "endmatch") { matches.push_back(data); break; } if (GetSize(tokens) == 3 && tokens[0] == "min") { data.min_limits[tokens[1]] = atoi(tokens[2].c_str()); continue; } if (GetSize(tokens) == 3 && tokens[0] == "max") { data.max_limits[tokens[1]] = atoi(tokens[2].c_str()); continue; } if (GetSize(tokens) == 2 && tokens[0] == "shuffle_enable" && GetSize(tokens[1]) == 1 && 'A' <= tokens[1][0] && tokens[1][0] <= 'Z') { data.shuffle_enable = tokens[1][0]; continue; } if (GetSize(tokens) == 1 && tokens[0] == "make_transp") { data.make_transp = true; continue; } if (GetSize(tokens) == 1 && tokens[0] == "make_outreg") { data.make_transp = true; data.make_outreg = true; continue; } if (GetSize(tokens) == 1 && tokens[0] == "or_next_if_better") { data.or_next_if_better = true; continue; } if (GetSize(tokens) >= 2 && tokens[0] == "attribute") { data.attributes.emplace_back(); for (int idx = 1; idx < GetSize(tokens); idx++) { size_t c1 = tokens[idx][0] == '!' ? 1 : 0; size_t c2 = tokens[idx].find("="); bool exists = (c1 == 0); IdString key = RTLIL::escape_id(tokens[idx].substr(c1, c2)); Const val = c2 != std::string::npos ? tokens[idx].substr(c2+1) : RTLIL::Const(1); data.attributes.back().emplace_back(exists, key, map_case(val)); } continue; } syntax_error(); } } void parse(string filename) { rewrite_filename(filename); infile.open(filename); linecount = 0; attr_icase = false; if (infile.fail()) log_error("Can't open rules file `%s'.\n", filename.c_str()); while (next_line()) { if (!labels.empty()) syntax_error(); if (GetSize(tokens) == 2 && tokens[0] == "attr_icase") { attr_icase = atoi(tokens[1].c_str()); continue; } if (tokens[0] == "bram") { parse_bram(); continue; } if (tokens[0] == "match") { parse_match(); continue; } syntax_error(); } infile.close(); } }; bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict &match_properties, int mode) { // We will modify ports — make a copy of the structure. Mem mem(orig_mem); Module *module = mem.module; auto portinfos = bram.make_portinfos(); int dup_count = 1; pair make_transp_clk; bool enable_make_transp = false; int make_transp_enbits = 0; dict> clock_domains; dict clock_polarities; dict read_transp; pool clocks_wr_ports; pool clkpol_wr_ports; int clocks_max = 0; int clkpol_max = 0; int transp_max = 0; clock_polarities[0] = false; clock_polarities[1] = true; for (auto &pi : portinfos) { if (pi.wrmode) { clocks_wr_ports.insert(pi.clocks); if (pi.clkpol > 1) clkpol_wr_ports.insert(pi.clkpol); } clocks_max = max(clocks_max, pi.clocks); clkpol_max = max(clkpol_max, pi.clkpol); transp_max = max(transp_max, pi.transp); } log(" Mapping to bram type %s (variant %d):\n", log_id(bram.name), bram.variant); // bram.dump_config(); bool cell_init = !mem.inits.empty(); vector initdata; if (cell_init) { Const initparam = mem.get_init_data(); initdata.reserve(mem.size); for (int i=0; i < mem.size; i++) initdata.push_back(initparam.extract(mem.width*i, mem.width, State::Sx)); } if (match.shuffle_enable && bram.dbits >= portinfos.at(match.shuffle_enable - 'A').enable*2 && portinfos.at(match.shuffle_enable - 'A').enable > 0 && !mem.wr_ports.empty()) { int bucket_size = bram.dbits / portinfos.at(match.shuffle_enable - 'A').enable; log(" Shuffle bit order to accommodate enable buckets of size %d..\n", bucket_size); // extract unshuffled data/enable bits std::vector old_wr_en; std::vector old_wr_data; std::vector old_rd_data; for (auto &port : mem.wr_ports) { old_wr_en.push_back(port.en); old_wr_data.push_back(port.data); } for (auto &port : mem.rd_ports) old_rd_data.push_back(port.data); // analyze enable structure std::vector en_order; dict> bits_wr_en; for (int i = 0; i < mem.width; i++) { SigSpec sig; for (auto &port : mem.wr_ports) sig.append(port.en[i]); if (bits_wr_en.count(sig) == 0) en_order.push_back(sig); bits_wr_en[sig].push_back(i); } // re-create memory ports std::vector new_wr_en(GetSize(old_wr_en)); std::vector new_wr_data(GetSize(old_wr_data)); std::vector new_rd_data(GetSize(old_rd_data)); std::vector> new_initdata; std::vector shuffle_map; if (cell_init) new_initdata.resize(mem.size); for (auto &it : en_order) { auto &bits = bits_wr_en.at(it); int buckets = (GetSize(bits) + bucket_size - 1) / bucket_size; int fillbits = buckets*bucket_size - GetSize(bits); SigBit fillbit; for (int i = 0; i < GetSize(bits); i++) { for (int j = 0; j < GetSize(mem.wr_ports); j++) { new_wr_en[j].append(old_wr_en[j][bits[i]]); new_wr_data[j].append(old_wr_data[j][bits[i]]); fillbit = old_wr_en[j][bits[i]]; } for (int j = 0; j < GetSize(mem.rd_ports); j++) new_rd_data[j].append(old_rd_data[j][bits[i]]); if (cell_init) { for (int j = 0; j < mem.size; j++) new_initdata[j].push_back(initdata[j][bits[i]]); } shuffle_map.push_back(bits[i]); } for (int i = 0; i < fillbits; i++) { for (int j = 0; j < GetSize(mem.wr_ports); j++) { new_wr_en[j].append(fillbit); new_wr_data[j].append(State::S0); } for (int j = 0; j < GetSize(mem.rd_ports); j++) new_rd_data[j].append(State::Sx); if (cell_init) { for (int j = 0; j < mem.size; j++) new_initdata[j].push_back(State::Sx); } shuffle_map.push_back(-1); } } log(" Results of bit order shuffling:"); for (int v : shuffle_map) log(" %d", v); log("\n"); // update mem_*, wr_*, and rd_* variables mem.width = GetSize(new_wr_en.front()); for (int i = 0; i < GetSize(mem.wr_ports); i++) { auto &port = mem.wr_ports[i]; port.en = new_wr_en[i]; port.data = new_wr_data[i]; } for (int i = 0; i < GetSize(mem.rd_ports); i++) { auto &port = mem.rd_ports[i]; port.data = new_rd_data[i]; } if (cell_init) { for (int i = 0; i < mem.size; i++) initdata[i] = Const(new_initdata[i]); } } // assign write ports pair wr_clkdom; for (int cell_port_i = 0, bram_port_i = 0; cell_port_i < GetSize(mem.wr_ports); cell_port_i++) { auto &port = mem.wr_ports[cell_port_i]; pair clkdom(port.clk, port.clk_polarity); if (!port.clk_enable) clkdom = pair(State::S1, false); wr_clkdom = clkdom; log(" Write port #%d is in clock domain %s%s.\n", cell_port_i, clkdom.second ? "" : "!", port.clk_enable ? log_signal(clkdom.first) : "~async~"); for (; bram_port_i < GetSize(portinfos); bram_port_i++) { auto &pi = portinfos[bram_port_i]; make_transp_enbits = pi.enable; make_transp_clk = clkdom; if (pi.wrmode != 1) skip_bram_wport: continue; if (port.clk_enable) { if (pi.clocks == 0) { log(" Bram port %c%d has incompatible clock type.\n", pi.group + 'A', pi.index + 1); goto skip_bram_wport; } if (clock_domains.count(pi.clocks) && clock_domains.at(pi.clocks) != clkdom) { log(" Bram port %c%d is in a different clock domain.\n", pi.group + 'A', pi.index + 1); goto skip_bram_wport; } if (clock_polarities.count(pi.clkpol) && clock_polarities.at(pi.clkpol) != port.clk_polarity) { log(" Bram port %c%d has incompatible clock polarity.\n", pi.group + 'A', pi.index + 1); goto skip_bram_wport; } } else { if (pi.clocks != 0) { log(" Bram port %c%d has incompatible clock type.\n", pi.group + 'A', pi.index + 1); goto skip_bram_wport; } } SigSpec sig_en; SigBit last_en_bit = State::S1; for (int i = 0; i < mem.width; i++) { if (pi.enable && i % (bram.dbits / pi.enable) == 0) { last_en_bit = port.en[i]; sig_en.append(last_en_bit); } if (last_en_bit != port.en[i]) { log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1); goto skip_bram_wport; } } log(" Mapped to bram port %c%d.\n", pi.group + 'A', pi.index + 1); pi.mapped_port = cell_port_i; if (port.clk_enable) { clock_domains[pi.clocks] = clkdom; clock_polarities[pi.clkpol] = clkdom.second; pi.sig_clock = clkdom.first; pi.effective_clkpol = clkdom.second; } pi.sig_en = sig_en; pi.sig_addr = port.addr; pi.sig_data = port.data; bram_port_i++; goto mapped_wr_port; } log(" Failed to map write port #%d.\n", cell_port_i); return false; mapped_wr_port:; } // housekeeping stuff for growing more read ports and restarting read port assignments int grow_read_ports_cursor = -1; bool try_growing_more_read_ports = false; auto backup_clock_domains = clock_domains; auto backup_clock_polarities = clock_polarities; if (0) { grow_read_ports:; vector new_portinfos; for (auto &pi : portinfos) { if (pi.wrmode == 0) { pi.mapped_port = -1; pi.sig_clock = SigBit(); pi.sig_addr = SigSpec(); pi.sig_data = SigSpec(); pi.sig_en = SigSpec(); pi.make_outreg = false; pi.make_transp = false; } new_portinfos.push_back(pi); if (pi.dupidx == dup_count-1) { if (pi.clocks && !clocks_wr_ports[pi.clocks]) pi.clocks += clocks_max; if (pi.clkpol > 1 && !clkpol_wr_ports[pi.clkpol]) pi.clkpol += clkpol_max; if (pi.transp > 1) pi.transp += transp_max; pi.dupidx++; new_portinfos.push_back(pi); } } try_growing_more_read_ports = false; portinfos.swap(new_portinfos); clock_domains = backup_clock_domains; clock_polarities = backup_clock_polarities; dup_count++; } read_transp.clear(); read_transp[0] = false; read_transp[1] = true; // assign read ports for (int cell_port_i = 0; cell_port_i < GetSize(mem.rd_ports); cell_port_i++) { auto &port = mem.rd_ports[cell_port_i]; bool transp = port.transparent; if (mem.wr_ports.empty()) transp = false; pair clkdom(port.clk, port.clk_polarity); if (!port.clk_enable) clkdom = pair(State::S1, false); log(" Read port #%d is in clock domain %s%s.\n", cell_port_i, clkdom.second ? "" : "!", port.clk_enable ? log_signal(clkdom.first) : "~async~"); for (int bram_port_i = 0; bram_port_i < GetSize(portinfos); bram_port_i++) { auto &pi = portinfos[bram_port_i]; if (pi.wrmode != 0 || pi.mapped_port >= 0) skip_bram_rport: continue; if (port.clk_enable) { if (pi.clocks == 0) { if (match.make_outreg) { pi.make_outreg = true; goto skip_bram_rport_clkcheck; } log(" Bram port %c%d.%d has incompatible clock type.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (clock_domains.count(pi.clocks) && clock_domains.at(pi.clocks) != clkdom) { log(" Bram port %c%d.%d is in a different clock domain.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (clock_polarities.count(pi.clkpol) && clock_polarities.at(pi.clkpol) != port.clk_polarity) { log(" Bram port %c%d.%d has incompatible clock polarity.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (port.en != State::S1 && pi.enable == 0) { log(" Bram port %c%d.%d has no read enable input.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (port.arst != State::S0) { log(" Bram port %c%d.%d has no async reset input.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (port.srst != State::S0) { log(" Bram port %c%d.%d has no sync reset input.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (!port.init_value.is_fully_undef()) { log(" Bram port %c%d.%d has no initial value support.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } if (read_transp.count(pi.transp) && read_transp.at(pi.transp) != transp) { if (match.make_transp && GetSize(mem.wr_ports) <= 1) { pi.make_transp = true; if (pi.clocks != 0) { if (GetSize(mem.wr_ports) == 1 && wr_clkdom != clkdom) { log(" Bram port %c%d.%d cannot have soft transparency logic added as read and write clock domains differ.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } enable_make_transp = true; } } else { log(" Bram port %c%d.%d has incompatible read transparency.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } } } else { if (pi.clocks != 0) { log(" Bram port %c%d.%d has incompatible clock type.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } } skip_bram_rport_clkcheck: log(" Mapped to bram port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); pi.mapped_port = cell_port_i; if (port.clk_enable && !pi.make_outreg) { clock_domains[pi.clocks] = clkdom; clock_polarities[pi.clkpol] = clkdom.second; if (!pi.make_transp) read_transp[pi.transp] = transp; pi.sig_clock = clkdom.first; pi.sig_en = port.en; pi.effective_clkpol = clkdom.second; } pi.sig_addr = port.addr; pi.sig_data = port.data; if (grow_read_ports_cursor < cell_port_i) { grow_read_ports_cursor = cell_port_i; try_growing_more_read_ports = true; } goto mapped_rd_port; } log(" Failed to map read port #%d.\n", cell_port_i); if (try_growing_more_read_ports) { log(" Growing more read ports by duplicating bram cells.\n"); goto grow_read_ports; } return false; mapped_rd_port:; } // update properties and re-check conditions if (mode <= 1) { match_properties["dups"] = dup_count; match_properties["waste"] = match_properties["dups"] * match_properties["bwaste"]; int cells = ((mem.width + bram.dbits - 1) / bram.dbits) * ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits)); match_properties["efficiency"] = (100 * match_properties["bits"]) / (dup_count * cells * bram.dbits * (1 << bram.abits)); match_properties["dcells"] = ((mem.width + bram.dbits - 1) / bram.dbits); match_properties["acells"] = ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits)); match_properties["cells"] = match_properties["dcells"] * match_properties["acells"] * match_properties["dups"]; log(" Updated properties: dups=%d waste=%d efficiency=%d\n", match_properties["dups"], match_properties["waste"], match_properties["efficiency"]); for (auto it : match.min_limits) { if (!match_properties.count(it.first)) log_error("Unknown property '%s' in match rule for bram type %s.\n", it.first.c_str(), log_id(match.name)); if (match_properties[it.first] >= it.second) continue; log(" Rule for bram type %s rejected: requirement 'min %s %d' not met.\n", log_id(match.name), it.first.c_str(), it.second); return false; } for (auto it : match.max_limits) { if (!match_properties.count(it.first)) log_error("Unknown property '%s' in match rule for bram type %s.\n", it.first.c_str(), log_id(match.name)); if (match_properties[it.first] <= it.second) continue; log(" Rule for bram type %s rejected: requirement 'max %s %d' not met.\n", log_id(match.name), it.first.c_str(), it.second); return false; } for (const auto &sums : match.attributes) { bool found = false; for (const auto &term : sums) { bool exists = std::get<0>(term); IdString key = std::get<1>(term); const Const &value = std::get<2>(term); auto it = mem.attributes.find(key); if (it == mem.attributes.end()) { if (exists) continue; found = true; break; } else if (!exists) continue; if (rules.map_case(it->second) != value) continue; found = true; break; } if (!found) { std::stringstream ss; bool exists = std::get<0>(sums.front()); if (!exists) ss << "!"; IdString key = std::get<1>(sums.front()); ss << log_id(key); const Const &value = rules.map_case(std::get<2>(sums.front())); if (exists && value != Const(1)) ss << "=\"" << value.decode_string() << "\""; log(" Rule for bram type %s rejected: requirement 'attribute %s ...' not met.\n", log_id(match.name), ss.str().c_str()); return false; } } if (mode == 1) return true; } // prepare variant parameters dict variant_params; for (auto &other_bram : rules.brams.at(bram.name)) bram.find_variant_params(variant_params, other_bram); // Apply make_outreg where necessary. for (auto &pi : portinfos) { if (pi.make_outreg) { mem.extract_rdff(pi.mapped_port, initvals); auto &port = mem.rd_ports[pi.mapped_port]; pi.sig_addr = port.addr; pi.sig_data = port.data; } } // actually replace that memory cell dict> dout_cache; for (int grid_d = 0; grid_d*bram.dbits < mem.width; grid_d++) { SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q; vector mktr_wren; if (enable_make_transp) { mktr_wraddr = module->addWire(NEW_ID, bram.abits); mktr_wrdata = module->addWire(NEW_ID, bram.dbits); mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits); module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second); for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++) mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits)); } for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++) for (int dupidx = 0; dupidx < dup_count; dupidx++) { Cell *c = module->addCell(module->uniquify(stringf("%s.%d.%d.%d", mem.memid.c_str(), grid_d, grid_a, dupidx)), bram.name); log(" Creating %s cell at grid position <%d %d %d>: %s\n", log_id(bram.name), grid_d, grid_a, dupidx, log_id(c)); for (auto &vp : variant_params) c->setParam(vp.first, vp.second); if (cell_init) { int init_offset = grid_a*(1 << bram.abits); int init_shift = grid_d*bram.dbits; int init_size = (1 << bram.abits); Const initparam(State::Sx, init_size*bram.dbits); for (int i = 0; i < init_size; i++) { State padding = State::Sx; for (int j = 0; j < bram.dbits; j++) if (init_offset+i < GetSize(initdata) && init_shift+j < GetSize(initdata[init_offset+i])) initparam[i*bram.dbits+j] = initdata[init_offset+i][init_shift+j]; else initparam[i*bram.dbits+j] = padding; } c->setParam(ID::INIT, initparam); } for (auto &pi : portinfos) { if (pi.dupidx != dupidx) continue; string prefix = stringf("%c%d", pi.group + 'A', pi.index + 1); const char *pf = prefix.c_str(); if (pi.clocks && (!c->hasPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1)) || pi.sig_clock.wire)) { c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), pi.sig_clock); if (pi.clkpol > 1 && pi.sig_clock.wire) c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol)); if (pi.transp > 1 && pi.sig_clock.wire) c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp)); } SigSpec addr_ok; if (GetSize(pi.sig_addr) > bram.abits) { SigSpec extra_addr = pi.sig_addr.extract(bram.abits, GetSize(pi.sig_addr) - bram.abits); SigSpec extra_addr_sel = SigSpec(grid_a, GetSize(extra_addr)); addr_ok = module->Eq(NEW_ID, extra_addr, extra_addr_sel); } if (pi.enable) { SigSpec sig_en = pi.sig_en; if (pi.wrmode == 1) { sig_en.extend_u0((grid_d+1) * pi.enable); sig_en = sig_en.extract(grid_d * pi.enable, pi.enable); } if (!addr_ok.empty()) sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok); c->setPort(stringf("\\%sEN", pf), sig_en); if (pi.wrmode == 1 && enable_make_transp) module->connect(mktr_wren[grid_a], sig_en); } SigSpec sig_addr = pi.sig_addr; sig_addr.extend_u0(bram.abits); c->setPort(stringf("\\%sADDR", pf), sig_addr); if (pi.wrmode == 1 && enable_make_transp && grid_a == 0) module->connect(mktr_wraddr, sig_addr); SigSpec sig_data = pi.sig_data; sig_data.extend_u0((grid_d+1) * bram.dbits); sig_data = sig_data.extract(grid_d * bram.dbits, bram.dbits); if (pi.wrmode == 1) { c->setPort(stringf("\\%sDATA", pf), sig_data); if (enable_make_transp && grid_a == 0) module->connect(mktr_wrdata, sig_data); } else { SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits); c->setPort(stringf("\\%sDATA", pf), bram_dout); if (pi.make_transp) { log(" Adding extra logic for transparent port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); SigSpec transp_en_d = module->Mux(NEW_ID, SigSpec(0, make_transp_enbits), mktr_wren[grid_a], module->Eq(NEW_ID, mktr_wraddr, sig_addr)); SigSpec transp_en_q = module->addWire(NEW_ID, make_transp_enbits); module->addDff(NEW_ID, make_transp_clk.first, transp_en_d, transp_en_q, make_transp_clk.second); for (int i = 0; i < make_transp_enbits; i++) { int en_width = bram.dbits / make_transp_enbits; SigSpec orig_bram_dout = bram_dout.extract(i * en_width, en_width); SigSpec bypass_dout = mktr_wrdata_q.extract(i * en_width, en_width); bram_dout.replace(i * en_width, module->Mux(NEW_ID, orig_bram_dout, bypass_dout, transp_en_q[i])); } } for (int i = bram.dbits-1; i >= 0; i--) if (sig_data[i].wire == nullptr) { sig_data.remove(i); bram_dout.remove(i); } SigSpec addr_ok_q = addr_ok; if (pi.clocks && !addr_ok.empty()) { addr_ok_q = module->addWire(NEW_ID); if (!pi.sig_en.empty()) addr_ok = module->Mux(NEW_ID, addr_ok_q, addr_ok, pi.sig_en); module->addDff(NEW_ID, pi.sig_clock, addr_ok, addr_ok_q, pi.effective_clkpol); } dout_cache[sig_data].first.append(addr_ok_q); dout_cache[sig_data].second.append(bram_dout); } } } } for (auto &it : dout_cache) { if (it.second.first.empty()) { log_assert(GetSize(it.first) == GetSize(it.second.second)); module->connect(it.first, it.second.second); } else { log_assert(GetSize(it.first)*GetSize(it.second.first) == GetSize(it.second.second)); module->addPmux(NEW_ID, SigSpec(State::Sx, GetSize(it.first)), it.second.second, it.second.first, it.first); } } mem.remove(); return true; } void handle_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals) { log("Processing %s.%s:\n", log_id(mem.module), log_id(mem.memid)); mem.narrow(); bool cell_init = !mem.inits.empty(); dict match_properties; match_properties["words"] = mem.size; match_properties["abits"] = ceil_log2(mem.size); match_properties["dbits"] = mem.width; match_properties["wports"] = GetSize(mem.wr_ports); match_properties["rports"] = GetSize(mem.rd_ports); match_properties["bits"] = match_properties["words"] * match_properties["dbits"]; match_properties["ports"] = match_properties["wports"] + match_properties["rports"]; log(" Properties:"); for (auto &it : match_properties) log(" %s=%d", it.first.c_str(), it.second); log("\n"); // This pass cannot deal with write port priority — we need to emulate it, // if present. Since priority emulation will change the enable signals, // which in turn may change enable grouping and mapping eligibility in // pathological cases, we need to do this before checking mapping // eligibility. This will create priority emulation logic for all // memories in the design regardless of whether we end up mapping them // or not, but since we never call Mem::emit(), the new priority masks // and enables won't be commited to the design, and this logic will be // unused (and removed by subsequent opt_clean) for unmapped memories. for (int i = 0; i < GetSize(mem.wr_ports); i++) for (int j = 0; j < i; j++) mem.emulate_priority(j, i); pool> failed_brams; dict, tuple> best_rule_cache; for (int i = 0; i < GetSize(rules.matches); i++) { auto &match = rules.matches.at(i); if (!rules.brams.count(rules.matches[i].name)) log_error("No bram description for resource %s found!\n", log_id(rules.matches[i].name)); for (int vi = 0; vi < GetSize(rules.brams.at(match.name)); vi++) { auto &bram = rules.brams.at(match.name).at(vi); bool or_next_if_better = match.or_next_if_better || vi+1 < GetSize(rules.brams.at(match.name)); int avail_rd_ports = 0; int avail_wr_ports = 0; for (int j = 0; j < bram.groups; j++) { if (GetSize(bram.wrmode) < j || bram.wrmode.at(j) == 0) avail_rd_ports += GetSize(bram.ports) < j ? bram.ports.at(j) : 0; if (GetSize(bram.wrmode) < j || bram.wrmode.at(j) != 0) avail_wr_ports += GetSize(bram.ports) < j ? bram.ports.at(j) : 0; } log(" Checking rule #%d for bram type %s (variant %d):\n", i+1, log_id(bram.name), bram.variant); log(" Bram geometry: abits=%d dbits=%d wports=%d rports=%d\n", bram.abits, bram.dbits, avail_wr_ports, avail_rd_ports); int dups = avail_rd_ports ? (match_properties["rports"] + avail_rd_ports - 1) / avail_rd_ports : 1; match_properties["dups"] = dups; log(" Estimated number of duplicates for more read ports: dups=%d\n", match_properties["dups"]); int aover = match_properties["words"] % (1 << bram.abits); int awaste = aover ? (1 << bram.abits) - aover : 0; match_properties["awaste"] = awaste; int dover = match_properties["dbits"] % bram.dbits; int dwaste = dover ? bram.dbits - dover : 0; match_properties["dwaste"] = dwaste; int bwaste = awaste * bram.dbits + dwaste * (1 << bram.abits) - awaste * dwaste; match_properties["bwaste"] = bwaste; int waste = match_properties["dups"] * bwaste; match_properties["waste"] = waste; int cells = ((match_properties["dbits"] + bram.dbits - 1) / bram.dbits) * ((match_properties["words"] + (1 << bram.abits) - 1) / (1 << bram.abits)); int efficiency = (100 * match_properties["bits"]) / (dups * cells * bram.dbits * (1 << bram.abits)); match_properties["efficiency"] = efficiency; if (failed_brams.count(pair(bram.name, bram.variant))) goto next_match_rule; log(" Metrics for %s: awaste=%d dwaste=%d bwaste=%d waste=%d efficiency=%d\n", log_id(match.name), awaste, dwaste, bwaste, waste, efficiency); if (cell_init && bram.init == 0) { log(" Rule #%d for bram type %s (variant %d) rejected: cannot be initialized.\n", i+1, log_id(bram.name), bram.variant); goto next_match_rule; } for (auto it : match.min_limits) { if (it.first == "waste" || it.first == "dups" || it.first == "acells" || it.first == "dcells" || it.first == "cells") continue; if (!match_properties.count(it.first)) log_error("Unknown property '%s' in match rule for bram type %s.\n", it.first.c_str(), log_id(match.name)); if (match_properties[it.first] >= it.second) continue; log(" Rule #%d for bram type %s (variant %d) rejected: requirement 'min %s %d' not met.\n", i+1, log_id(bram.name), bram.variant, it.first.c_str(), it.second); goto next_match_rule; } for (auto it : match.max_limits) { if (it.first == "acells" || it.first == "dcells" || it.first == "cells") continue; if (!match_properties.count(it.first)) log_error("Unknown property '%s' in match rule for bram type %s.\n", it.first.c_str(), log_id(match.name)); if (match_properties[it.first] <= it.second) continue; log(" Rule #%d for bram type %s (variant %d) rejected: requirement 'max %s %d' not met.\n", i+1, log_id(bram.name), bram.variant, it.first.c_str(), it.second); goto next_match_rule; } for (const auto &sums : match.attributes) { bool found = false; for (const auto &term : sums) { bool exists = std::get<0>(term); IdString key = std::get<1>(term); const Const &value = std::get<2>(term); auto it = mem.attributes.find(key); if (it == mem.attributes.end()) { if (exists) continue; found = true; break; } else if (!exists) continue; if (rules.map_case(it->second) != value) continue; found = true; break; } if (!found) { std::stringstream ss; bool exists = std::get<0>(sums.front()); if (!exists) ss << "!"; IdString key = std::get<1>(sums.front()); ss << log_id(key); const Const &value = rules.map_case(std::get<2>(sums.front())); if (exists && value != Const(1)) ss << "=\"" << value.decode_string() << "\""; log(" Rule for bram type %s (variant %d) rejected: requirement 'attribute %s ...' not met.\n", log_id(bram.name), bram.variant, ss.str().c_str()); goto next_match_rule; } } log(" Rule #%d for bram type %s (variant %d) accepted.\n", i+1, log_id(bram.name), bram.variant); if (or_next_if_better || !best_rule_cache.empty()) { if (or_next_if_better && i+1 == GetSize(rules.matches) && vi+1 == GetSize(rules.brams.at(match.name))) log_error("Found 'or_next_if_better' in last match rule.\n"); if (!replace_memory(mem, rules, initvals, bram, match, match_properties, 1)) { log(" Mapping to bram type %s failed.\n", log_id(match.name)); failed_brams.insert(pair(bram.name, bram.variant)); goto next_match_rule; } log(" Storing for later selection.\n"); best_rule_cache[pair(i, vi)] = tuple(match_properties["efficiency"], -match_properties["cells"], -match_properties["acells"]); next_match_rule: if (or_next_if_better || best_rule_cache.empty()) continue; log(" Selecting best of %d rules:\n", GetSize(best_rule_cache)); pair best_rule = best_rule_cache.begin()->first; for (auto &it : best_rule_cache) { if (it.second > best_rule_cache[best_rule]) best_rule = it.first; log(" Efficiency for rule %d.%d: efficiency=%d, cells=%d, acells=%d\n", it.first.first+1, it.first.second+1, std::get<0>(it.second), -std::get<1>(it.second), -std::get<2>(it.second)); } log(" Selected rule %d.%d with efficiency %d.\n", best_rule.first+1, best_rule.second+1, std::get<0>(best_rule_cache[best_rule])); best_rule_cache.clear(); auto &best_bram = rules.brams.at(rules.matches.at(best_rule.first).name).at(best_rule.second); if (!replace_memory(mem, rules, initvals, best_bram, rules.matches.at(best_rule.first), match_properties, 2)) log_error("Mapping to bram type %s (variant %d) after pre-selection failed.\n", log_id(best_bram.name), best_bram.variant); return; } if (!replace_memory(mem, rules, initvals, bram, match, match_properties, 0)) { log(" Mapping to bram type %s failed.\n", log_id(match.name)); failed_brams.insert(pair(bram.name, bram.variant)); goto next_match_rule; } return; } } log(" No acceptable bram resources found.\n"); } struct MemoryBramPass : public Pass { MemoryBramPass() : Pass("memory_bram", "map memories to block rams") { } void help() override { // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| log("\n"); log(" memory_bram -rules [selection]\n"); log("\n"); log("This pass converts the multi-port $mem memory cells into block ram instances.\n"); log("The given rules file describes the available resources and how they should be\n"); log("used.\n"); log("\n"); log("The rules file contains configuration options, a set of block ram description\n"); log("and a sequence of match rules.\n"); log("\n"); log("The option 'attr_icase' configures how attribute values are matched. The value 0\n"); log("means case-sensitive, 1 means case-insensitive.\n"); log("\n"); log("A block ram description looks like this:\n"); log("\n"); log(" bram RAMB1024X32 # name of BRAM cell\n"); log(" init 1 # set to '1' if BRAM can be initialized\n"); log(" abits 10 # number of address bits\n"); log(" dbits 32 # number of data bits\n"); log(" groups 2 # number of port groups\n"); log(" ports 1 1 # number of ports in each group\n"); log(" wrmode 1 0 # set to '1' if this groups is write ports\n"); log(" enable 4 1 # number of enable bits\n"); log(" transp 0 2 # transparent (for read ports)\n"); log(" clocks 1 2 # clock configuration\n"); log(" clkpol 2 2 # clock polarity configuration\n"); log(" endbram\n"); log("\n"); log("For the option 'transp' the value 0 means non-transparent, 1 means transparent\n"); log("and a value greater than 1 means configurable. All groups with the same\n"); log("value greater than 1 share the same configuration bit.\n"); log("\n"); log("For the option 'clocks' the value 0 means non-clocked, and a value greater\n"); log("than 0 means clocked. All groups with the same value share the same clock\n"); log("signal.\n"); log("\n"); log("For the option 'clkpol' the value 0 means negative edge, 1 means positive edge\n"); log("and a value greater than 1 means configurable. All groups with the same value\n"); log("greater than 1 share the same configuration bit.\n"); log("\n"); log("Using the same bram name in different bram blocks will create different variants\n"); log("of the bram. Verilog configuration parameters for the bram are created as needed.\n"); log("\n"); log("It is also possible to create variants by repeating statements in the bram block\n"); log("and appending '@