From f6b3333a7d34c865e0e371b5585e1ee8151a58b4 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Sat, 4 Aug 2018 19:50:49 +0200 Subject: Add new iCE40 delay estimator and delay predictor Signed-off-by: Clifford Wolf --- ice40/arch.cc | 58 ------------ ice40/arch.h | 3 +- ice40/chipdb.py | 2 +- ice40/delay.cc | 133 +++++++++++++++++++++++++++- ice40/tmfuzz.py | 268 ++++++++++++++++++++++++++++++++++++++++++++------------ 5 files changed, 343 insertions(+), 121 deletions(-) (limited to 'ice40') diff --git a/ice40/arch.cc b/ice40/arch.cc index 3b9a6992..de752b59 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -639,64 +639,6 @@ std::vector Arch::getGroupGroups(GroupId group) const // ----------------------------------------------------------------------- -delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const -{ - const auto &driver = net_info->driver; - auto driver_loc = getBelLocation(driver.cell->bel); - auto sink_loc = getBelLocation(sink.cell->bel); - - if (driver.port == id_cout) { - if (driver_loc.y == sink_loc.y) - return 0; - return 250; - } - -#if 1 - int xd = sink_loc.x - driver_loc.x, yd = sink_loc.y - driver_loc.y; - int xscale = 120, yscale = 120, offset = 0; - - // if (chip_info->wire_data[src.index].type == WIRE_TYPE_SP4_VERT) { - // yd = yd < -4 ? yd + 4 : (yd < 0 ? 0 : yd); - // offset = 500; - // } - - if (driver.port == id_o) - offset += 330; - if (sink.port == id_i0 || sink.port == id_i1 || sink.port == id_i2 || sink.port == id_i3) - offset += 260; - - return xscale * abs(xd) + yscale * abs(yd) + offset; -#else - float model1_param_offset = 902.1066988; - float model1_param_norm1 = 169.80428447; - float model1_param_norm2 = -503.28635487; - float model1_param_norm3 = 402.96583807; - - float model2_param_offset = -1.09578873e+03; - float model2_param_linear = 5.01094876e-01; - float model2_param_sqrt = 4.71761281e+01; - - float dx = fabsf(sink_loc.x - driver_loc.x); - float dy = fabsf(sink_loc.y - driver_loc.y); - float norm1 = dx + dy; - - float dx2 = dx * dx; - float dy2 = dy * dy; - float norm2 = sqrtf(dx2 + dy2); - - float dx3 = dx2 * dx; - float dy3 = dy2 * dy; - float norm3 = powf(dx3 + dy3, 1.0/3.0); - - float v = model1_param_offset; - v += model1_param_norm1 * norm1; - v += model1_param_norm2 * norm2; - v += model1_param_norm3 * norm3; - - return model2_param_offset + model2_param_linear * v + model2_param_sqrt * sqrtf(v); -#endif -} - delay_t Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay_t budget) const { const auto &driver = net_info->driver; diff --git a/ice40/arch.h b/ice40/arch.h index 324915eb..ff52c1a5 100644 --- a/ice40/arch.h +++ b/ice40/arch.h @@ -120,9 +120,8 @@ NPNR_PACKED_STRUCT(struct WireInfoPOD { int32_t fast_delay; int32_t slow_delay; - int8_t x, y; + int8_t x, y, z; WireType type; - int8_t padding_0; }); NPNR_PACKED_STRUCT(struct PackagePinPOD { diff --git a/ice40/chipdb.py b/ice40/chipdb.py index 7c60a336..b0d9e567 100644 --- a/ice40/chipdb.py +++ b/ice40/chipdb.py @@ -1123,8 +1123,8 @@ for wire, info in enumerate(wireinfo): bba.u8(info["x"], "x") bba.u8(info["y"], "y") + bba.u8(0, "z") # FIXME bba.u8(wiretypes[wire_type(info["name"])], "type") - bba.u8(0, "padding") for wire in range(num_wires): if len(wire_segments[wire]): diff --git a/ice40/delay.cc b/ice40/delay.cc index d63af5d1..342b7f0b 100644 --- a/ice40/delay.cc +++ b/ice40/delay.cc @@ -23,7 +23,7 @@ NEXTPNR_NAMESPACE_BEGIN -#define NUM_FUZZ_ROUTES 100000 +#define NUM_FUZZ_ROUTES 1000 void ice40DelayFuzzerMain(Context *ctx) { @@ -101,20 +101,145 @@ void ice40DelayFuzzerMain(Context *ctx) } } +namespace { + +struct model_params_t { + int neighbourhood; + + int model0_offset; + int model0_norm1; + + int model1_offset; + int model1_norm1; + int model1_norm2; + int model1_norm3; + + int model2_offset; + int model2_linear; + int model2_sqrt; + + int delta_local; + int delta_lutffin; + int delta_sp4; + int delta_sp12; + + static const model_params_t &get(ArchArgs args) + { + static const model_params_t model_hx8k = { + 588, 129253, 8658, + 118333, 23915, -73105, 57696, + -86797, 89, 3706, + -316, -575, -158, -296 + }; + + static const model_params_t model_lp8k = { + 867, 206236, 11043, + 191910, 31074, -95972, 75739, + -309793, 30, 11056, + -474, -856, -363, -536 + }; + + static const model_params_t model_up5k = { + 1761, 305798, 16705, + 296830, 24430, -40369, 33038, + -162662, 94, 4705, + -1099, -1761, -418, -838 + }; + + if (args.type == ArchArgs::HX1K || args.type == ArchArgs::HX8K) + return model_hx8k; + + if (args.type == ArchArgs::LP384 || args.type == ArchArgs::LP1K || args.type == ArchArgs::LP8K) + return model_lp8k; + + if (args.type == ArchArgs::UP5K) + return model_up5k; + + NPNR_ASSERT(0); + } +}; + +} // namespace + delay_t Arch::estimateDelay(WireId src, WireId dst) const { NPNR_ASSERT(src != WireId()); int x1 = chip_info->wire_data[src.index].x; int y1 = chip_info->wire_data[src.index].y; + int z1 = chip_info->wire_data[src.index].z; + int type = chip_info->wire_data[src.index].type; NPNR_ASSERT(dst != WireId()); int x2 = chip_info->wire_data[dst.index].x; int y2 = chip_info->wire_data[dst.index].y; + int z2 = chip_info->wire_data[dst.index].z; + + int dx = abs(x2 - x1); + int dy = abs(y2 - y1); + + const model_params_t &p = model_params_t::get(args); + delay_t v = p.neighbourhood; + + if (dx > 1 || dy > 1) + v = (p.model0_offset + p.model0_norm1 * (dx + dy)) / 128; + + if (type == WireInfoPOD::WIRE_TYPE_LOCAL) + v += p.delta_local; + + if (type == WireInfoPOD::WIRE_TYPE_LUTFF_IN || type == WireInfoPOD::WIRE_TYPE_LUTFF_IN_LUT) + v += (z1 == z2) ? p.delta_lutffin : 1000; + + if (type == WireInfoPOD::WIRE_TYPE_SP4_V || type == WireInfoPOD::WIRE_TYPE_SP4_H) + v += p.delta_sp4; + + if (type == WireInfoPOD::WIRE_TYPE_SP12_V || type == WireInfoPOD::WIRE_TYPE_SP12_H) + v += p.delta_sp12; + + return v; +} + +delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const +{ + const auto &driver = net_info->driver; + auto driver_loc = getBelLocation(driver.cell->bel); + auto sink_loc = getBelLocation(sink.cell->bel); + + if (driver.port == id_cout) { + if (driver_loc.y == sink_loc.y) + return 0; + return 250; + } + + int dx = abs(sink_loc.x - driver_loc.x); + int dy = abs(sink_loc.y - driver_loc.y); + + const model_params_t &p = model_params_t::get(args); + + if (dx <= 1 && dy <= 1) + return p.neighbourhood; + + float norm1 = dx + dy; + + float dx2 = dx * dx; + float dy2 = dy * dy; + float norm2 = sqrtf(dx2 + dy2); + + float dx3 = dx2 * dx; + float dy3 = dy2 * dy; + float norm3 = powf(dx3 + dy3, 1.0/3.0); + + // Model #1 + float v = p.model1_offset; + v += p.model1_norm1 * norm1; + v += p.model1_norm2 * norm2; + v += p.model1_norm3 * norm3; + v /= 128; - int xd = x2 - x1, yd = y2 - y1; - int xscale = 120, yscale = 120, offset = 0; + // Model #2 + v = p.model2_offset + p.model2_linear * v + p.model2_sqrt * sqrtf(v); + v /= 128; - return xscale * abs(xd) + yscale * abs(yd) + offset; + return v; } NEXTPNR_NAMESPACE_END diff --git a/ice40/tmfuzz.py b/ice40/tmfuzz.py index caf3bc80..4ec2a546 100644 --- a/ice40/tmfuzz.py +++ b/ice40/tmfuzz.py @@ -1,12 +1,17 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # ../nextpnr-ice40 --hx8k --tmfuzz > tmfuzz_hx8k.txt +# ../nextpnr-ice40 --lp8k --tmfuzz > tmfuzz_lp8k.txt +# ../nextpnr-ice40 --up5k --tmfuzz > tmfuzz_up5k.txt import numpy as np import matplotlib.pyplot as plt from collections import defaultdict device = "hx8k" +# device = "lp8k" +# device = "up5k" + sel_src_type = "LUTFF_OUT" sel_dst_type = "LUTFF_IN_LUT" @@ -15,10 +20,17 @@ sel_dst_type = "LUTFF_IN_LUT" src_dst_pairs = defaultdict(lambda: 0) delay_data = list() +all_delay_data = list() + delay_map_sum = np.zeros((41, 41)) delay_map_sum2 = np.zeros((41, 41)) delay_map_count = np.zeros((41, 41)) +same_tile_delays = list() +neighbour_tile_delays = list() + +type_delta_data = dict() + with open("tmfuzz_%s.txt" % device, "r") as f: for line in f: line = line.split() @@ -35,23 +47,52 @@ with open("tmfuzz_%s.txt" % device, "r") as f: delay = int(line[5]) estdelay = int(line[6]) + all_delay_data.append((delay, estdelay)) + src_dst_pairs[src_type, dst_type] += 1 + dx = dst_xy[0] - src_xy[0] + dy = dst_xy[1] - src_xy[1] + if src_type == sel_src_type and dst_type == sel_dst_type: - delay_data.append((delay, estdelay)) - relx = 20 + dst_xy[0] - src_xy[0] - rely = 20 + dst_xy[1] - src_xy[1] + if dx == 0 and dy == 0: + same_tile_delays.append(delay) + + elif abs(dx) <= 1 and abs(dy) <= 1: + neighbour_tile_delays.append(delay) + + else: + delay_data.append((delay, estdelay, dx, dy, 0, 0, 0)) + + relx = 20 + dst_xy[0] - src_xy[0] + rely = 20 + dst_xy[1] - src_xy[1] + + if (0 <= relx <= 40) and (0 <= rely <= 40): + delay_map_sum[relx, rely] += delay + delay_map_sum2[relx, rely] += delay*delay + delay_map_count[relx, rely] += 1 + + if dst_type == sel_dst_type: + if src_type not in type_delta_data: + type_delta_data[src_type] = list() - if (0 <= relx <= 40) and (0 <= rely <= 40): - delay_map_sum[relx, rely] += delay - delay_map_sum2[relx, rely] += delay*delay - delay_map_count[relx, rely] += 1 + type_delta_data[src_type].append((dx, dy, delay)) delay_data = np.array(delay_data) +all_delay_data = np.array(all_delay_data) +max_delay = np.max(delay_data[:, 0:2]) + +mean_same_tile_delays = np.mean(neighbour_tile_delays) +mean_neighbour_tile_delays = np.mean(neighbour_tile_delays) + +print("Avg same tile delay: %.2f (%.2f std, N=%d)" % \ + (mean_same_tile_delays, np.std(same_tile_delays), len(same_tile_delays))) +print("Avg neighbour tile delay: %.2f (%.2f std, N=%d)" % \ + (mean_neighbour_tile_delays, np.std(neighbour_tile_delays), len(neighbour_tile_delays))) #%% Apply simple low-weight bluring to fill gaps -for i in range(1): +for i in range(0): neigh_sum = np.zeros((41, 41)) neigh_sum2 = np.zeros((41, 41)) neigh_count = np.zeros((41, 41)) @@ -84,8 +125,14 @@ print() #%% Plot estimate vs actual delay -plt.figure() -plt.plot(delay_data[:,0], delay_data[:,1], ".") +plt.figure(figsize=(8, 3)) +plt.title("Estimate vs Actual Delay") +plt.plot(all_delay_data[:, 0], all_delay_data[:, 1], ".") +plt.plot(delay_data[:, 0], delay_data[:, 1], ".") +plt.plot([0, max_delay], [0, max_delay], "k") +plt.ylabel("Estimated Delay") +plt.xlabel("Actual Delay") +plt.grid() plt.show() #%% Plot delay heatmap and std dev heatmap @@ -101,7 +148,65 @@ plt.imshow(delay_map_std) plt.colorbar() plt.show() -#%% Linear least-squares fits of delayEstimate models +#%% Generate Model #0 + +def nonlinearPreprocessor0(dx, dy): + dx, dy = abs(dx), abs(dy) + values = [1.0] + values.append(dx + dy) + return np.array(values) + +A = np.zeros((41*41, len(nonlinearPreprocessor0(0, 0)))) +b = np.zeros(41*41) + +index = 0 +for x in range(41): + for y in range(41): + if delay_map_count[x, y] > 0: + A[index, :] = nonlinearPreprocessor0(x-20, y-20) + b[index] = delay_map[x, y] + index += 1 + +model0_params, _, _, _ = np.linalg.lstsq(A, b) +print("Model #0 parameters:", model0_params) + +model0_map = np.zeros((41, 41)) +for x in range(41): + for y in range(41): + v = np.dot(model0_params, nonlinearPreprocessor0(x-20, y-20)) + model0_map[x, y] = v + +plt.figure(figsize=(9, 3)) +plt.subplot(121) +plt.title("Model #0 Delay Map") +plt.imshow(model0_map) +plt.colorbar() +plt.subplot(122) +plt.title("Model #0 Error Map") +plt.imshow(model0_map - delay_map) +plt.colorbar() +plt.show() + +for i in range(delay_data.shape[0]): + dx = delay_data[i, 2] + dy = delay_data[i, 3] + delay_data[i, 4] = np.dot(model0_params, nonlinearPreprocessor0(dx, dy)) + +plt.figure(figsize=(8, 3)) +plt.title("Model #0 vs Actual Delay") +plt.plot(delay_data[:, 0], delay_data[:, 4], ".") +plt.plot(delay_map.flat, model0_map.flat, ".") +plt.plot([0, max_delay], [0, max_delay], "k") +plt.ylabel("Model #0 Delay") +plt.xlabel("Actual Delay") +plt.grid() +plt.show() + +print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model0_map)**2))) +print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 4])**2))) +print() + +#%% Generate Model #1 def nonlinearPreprocessor1(dx, dy): dx, dy = abs(dx), abs(dy) @@ -117,8 +222,9 @@ b = np.zeros(41*41) index = 0 for x in range(41): for y in range(41): - A[index, :] = nonlinearPreprocessor1(x-20, y-20) - b[index] = delay_map[x, y] + if delay_map_count[x, y] > 0: + A[index, :] = nonlinearPreprocessor1(x-20, y-20) + b[index] = delay_map[x, y] index += 1 model1_params, _, _, _ = np.linalg.lstsq(A, b) @@ -141,61 +247,111 @@ plt.imshow(model1_map - delay_map) plt.colorbar() plt.show() +for i in range(delay_data.shape[0]): + dx = delay_data[i, 2] + dy = delay_data[i, 3] + delay_data[i, 5] = np.dot(model1_params, nonlinearPreprocessor1(dx, dy)) + plt.figure(figsize=(8, 3)) -plt.title("Model #1 vs Actual Delay") +plt.title("Model #1 vs Actual Delay") +plt.plot(delay_data[:, 0], delay_data[:, 5], ".") plt.plot(delay_map.flat, model1_map.flat, ".") -plt.plot([0, 4000], [0, 4000], "k") +plt.plot([0, max_delay], [0, max_delay], "k") plt.ylabel("Model #1 Delay") plt.xlabel("Actual Delay") plt.grid() plt.show() -print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model1_map)**2))) +print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model1_map)**2))) +print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 5])**2))) print() -if True: - def nonlinearPreprocessor2(v): - return np.array([1, v, np.sqrt(v)]) +#%% Generate Model #2 - A = np.zeros((41*41, len(nonlinearPreprocessor2(0)))) - b = np.zeros(41*41) +def nonlinearPreprocessor2(v): + return np.array([1, v, np.sqrt(v)]) - index = 0 - for x in range(41): - for y in range(41): +A = np.zeros((41*41, len(nonlinearPreprocessor2(0)))) +b = np.zeros(41*41) + +index = 0 +for x in range(41): + for y in range(41): + if delay_map_count[x, y] > 0: A[index, :] = nonlinearPreprocessor2(model1_map[x, y]) b[index] = delay_map[x, y] - index += 1 + index += 1 - model2_params, _, _, _ = np.linalg.lstsq(A, b) - print("Model #2 parameters:", model2_params) +model2_params, _, _, _ = np.linalg.lstsq(A, b) +print("Model #2 parameters:", model2_params) - model2_map = np.zeros((41, 41)) - for x in range(41): - for y in range(41): - v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20)) - v = np.dot(model2_params, nonlinearPreprocessor2(v)) - model2_map[x, y] = v - - plt.figure(figsize=(9, 3)) - plt.subplot(121) - plt.title("Model #2 Delay Map") - plt.imshow(model2_map) - plt.colorbar() - plt.subplot(122) - plt.title("Model #2 Error Map") - plt.imshow(model2_map - delay_map) - plt.colorbar() - plt.show() - - plt.figure(figsize=(8, 3)) - plt.title("Model #2 vs Actual Delay") - plt.plot(delay_map.flat, model2_map.flat, ".") - plt.plot([0, 4000], [0, 4000], "k") - plt.ylabel("Model #2 Delay") - plt.xlabel("Actual Delay") - plt.grid() - plt.show() - - print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model2_map)**2))) - print() +model2_map = np.zeros((41, 41)) +for x in range(41): + for y in range(41): + v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20)) + v = np.dot(model2_params, nonlinearPreprocessor2(v)) + model2_map[x, y] = v + +plt.figure(figsize=(9, 3)) +plt.subplot(121) +plt.title("Model #2 Delay Map") +plt.imshow(model2_map) +plt.colorbar() +plt.subplot(122) +plt.title("Model #2 Error Map") +plt.imshow(model2_map - delay_map) +plt.colorbar() +plt.show() + +for i in range(delay_data.shape[0]): + dx = delay_data[i, 2] + dy = delay_data[i, 3] + delay_data[i, 6] = np.dot(model2_params, nonlinearPreprocessor2(delay_data[i, 5])) + +plt.figure(figsize=(8, 3)) +plt.title("Model #2 vs Actual Delay") +plt.plot(delay_data[:, 0], delay_data[:, 6], ".") +plt.plot(delay_map.flat, model2_map.flat, ".") +plt.plot([0, max_delay], [0, max_delay], "k") +plt.ylabel("Model #2 Delay") +plt.xlabel("Actual Delay") +plt.grid() +plt.show() + +print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model2_map)**2))) +print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 6])**2))) +print() + +#%% Generate deltas for different source net types + +type_deltas = dict() + +print("Delay deltas for different src types:") +for src_type in sorted(type_delta_data.keys()): + deltas = list() + + for dx, dy, delay in type_delta_data[src_type]: + dx = abs(dx) + dy = abs(dy) + + if dx > 1 or dy > 1: + est = model0_params[0] + model0_params[1] * (dx + dy) + else: + est = mean_neighbour_tile_delays + deltas.append(delay - est) + + print("%15s: %8.2f (std %6.2f)" % (\ + src_type, np.mean(deltas), np.std(deltas))) + + type_deltas[src_type] = np.mean(deltas) + +#%% Print C defs of model parameters + +print("--snip--") +print("%d, %d, %d," % (mean_neighbour_tile_delays, 128 * model0_params[0], 128 * model0_params[1])) +print("%d, %d, %d, %d," % (128 * model1_params[0], 128 * model1_params[1], 128 * model1_params[2], 128 * model1_params[3])) +print("%d, %d, %d," % (128 * model2_params[0], 128 * model2_params[1], 128 * model2_params[2])) +print("%d, %d, %d, %d" % (type_deltas["LOCAL"], type_deltas["LUTFF_IN"], \ + (type_deltas["SP4_H"] + type_deltas["SP4_V"]) / 2, + (type_deltas["SP12_H"] + type_deltas["SP12_V"]) / 2)) +print("--snap--") -- cgit v1.2.3