1 files changed, 212 insertions, 56 deletions
diff --git a/ice40/tmfuzz.py b/ice40/tmfuzz.py
index caf3bc80..4ec2a546 100644
--- a/ice40/tmfuzz.py
+++ b/ice40/tmfuzz.py
@@ -1,12 +1,17 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # ../nextpnr-ice40 --hx8k --tmfuzz > tmfuzz_hx8k.txt
+# ../nextpnr-ice40 --lp8k --tmfuzz > tmfuzz_lp8k.txt
+# ../nextpnr-ice40 --up5k --tmfuzz > tmfuzz_up5k.txt
 
 import numpy as np
 import matplotlib.pyplot as plt
 from collections import defaultdict
 
 device = "hx8k"
+# device = "lp8k"
+# device = "up5k"
+
 sel_src_type = "LUTFF_OUT"
 sel_dst_type = "LUTFF_IN_LUT"
 
@@ -15,10 +20,17 @@ sel_dst_type = "LUTFF_IN_LUT"
 src_dst_pairs = defaultdict(lambda: 0)
 
 delay_data = list()
+all_delay_data = list()
+
 delay_map_sum = np.zeros((41, 41))
 delay_map_sum2 = np.zeros((41, 41))
 delay_map_count = np.zeros((41, 41))
 
+same_tile_delays = list()
+neighbour_tile_delays = list()
+
+type_delta_data = dict()
+
 with open("tmfuzz_%s.txt" % device, "r") as f:
     for line in f:
         line = line.split()
@@ -35,23 +47,52 @@ with open("tmfuzz_%s.txt" % device, "r") as f:
         delay = int(line[5])
         estdelay = int(line[6])
 
+        all_delay_data.append((delay, estdelay))
+
         src_dst_pairs[src_type, dst_type] += 1
 
+        dx = dst_xy[0] - src_xy[0]
+        dy = dst_xy[1] - src_xy[1]
+
         if src_type == sel_src_type and dst_type == sel_dst_type:
-            delay_data.append((delay, estdelay))
-            relx = 20 + dst_xy[0] - src_xy[0]
-            rely = 20 + dst_xy[1] - src_xy[1]
+            if dx == 0 and dy == 0:
+                same_tile_delays.append(delay)
+
+            elif abs(dx) <= 1 and abs(dy) <= 1:
+                neighbour_tile_delays.append(delay)
+
+            else:
+                delay_data.append((delay, estdelay, dx, dy, 0, 0, 0))
+
+                relx = 20 + dst_xy[0] - src_xy[0]
+                rely = 20 + dst_xy[1] - src_xy[1]
+
+                if (0 <= relx <= 40) and (0 <= rely <= 40):
+                    delay_map_sum[relx, rely] += delay
+                    delay_map_sum2[relx, rely] += delay*delay
+                    delay_map_count[relx, rely] += 1
+
+        if dst_type == sel_dst_type:
+            if src_type not in type_delta_data:
+                type_delta_data[src_type] = list()
 
-            if (0 <= relx <= 40) and (0 <= rely <= 40):
-                delay_map_sum[relx, rely] += delay
-                delay_map_sum2[relx, rely] += delay*delay
-                delay_map_count[relx, rely] += 1
+            type_delta_data[src_type].append((dx, dy, delay))
 
 delay_data = np.array(delay_data)
+all_delay_data = np.array(all_delay_data)
+max_delay = np.max(delay_data[:, 0:2])
+
+mean_same_tile_delays = np.mean(neighbour_tile_delays)
+mean_neighbour_tile_delays = np.mean(neighbour_tile_delays)
+
+print("Avg same tile delay: %.2f (%.2f std, N=%d)" % \
+        (mean_same_tile_delays, np.std(same_tile_delays), len(same_tile_delays)))
+print("Avg neighbour tile delay: %.2f (%.2f std, N=%d)" % \
+        (mean_neighbour_tile_delays, np.std(neighbour_tile_delays), len(neighbour_tile_delays)))
 
 #%% Apply simple low-weight bluring to fill gaps
 
-for i in range(1):
+for i in range(0):
     neigh_sum = np.zeros((41, 41))
     neigh_sum2 = np.zeros((41, 41))
     neigh_count = np.zeros((41, 41))
@@ -84,8 +125,14 @@ print()
 
 #%% Plot estimate vs actual delay
 
-plt.figure()
-plt.plot(delay_data[:,0], delay_data[:,1], ".")
+plt.figure(figsize=(8, 3))
+plt.title("Estimate vs Actual Delay")
+plt.plot(all_delay_data[:, 0], all_delay_data[:, 1], ".")
+plt.plot(delay_data[:, 0], delay_data[:, 1], ".")
+plt.plot([0, max_delay], [0, max_delay], "k")
+plt.ylabel("Estimated Delay")
+plt.xlabel("Actual Delay")
+plt.grid()
 plt.show()
 
 #%% Plot delay heatmap and std dev heatmap
@@ -101,7 +148,65 @@ plt.imshow(delay_map_std)
 plt.colorbar()
 plt.show()
 
-#%% Linear least-squares fits of delayEstimate models
+#%% Generate Model #0
+
+def nonlinearPreprocessor0(dx, dy):
+    dx, dy = abs(dx), abs(dy)
+    values = [1.0]
+    values.append(dx + dy)
+    return np.array(values)
+
+A = np.zeros((41*41, len(nonlinearPreprocessor0(0, 0))))
+b = np.zeros(41*41)
+
+index = 0
+for x in range(41):
+    for y in range(41):
+        if delay_map_count[x, y] > 0:
+            A[index, :] = nonlinearPreprocessor0(x-20, y-20)
+            b[index] = delay_map[x, y]
+        index += 1
+
+model0_params, _, _, _ = np.linalg.lstsq(A, b)
+print("Model #0 parameters:", model0_params)
+
+model0_map = np.zeros((41, 41))
+for x in range(41):
+    for y in range(41):
+        v = np.dot(model0_params, nonlinearPreprocessor0(x-20, y-20))
+        model0_map[x, y] = v
+
+plt.figure(figsize=(9, 3))
+plt.subplot(121)
+plt.title("Model #0 Delay Map")
+plt.imshow(model0_map)
+plt.colorbar()
+plt.subplot(122)
+plt.title("Model #0 Error Map")
+plt.imshow(model0_map - delay_map)
+plt.colorbar()
+plt.show()
+
+for i in range(delay_data.shape[0]):
+    dx = delay_data[i, 2]
+    dy = delay_data[i, 3]
+    delay_data[i, 4] =  np.dot(model0_params, nonlinearPreprocessor0(dx, dy))
+
+plt.figure(figsize=(8, 3))
+plt.title("Model #0 vs Actual Delay")
+plt.plot(delay_data[:, 0], delay_data[:, 4], ".")
+plt.plot(delay_map.flat, model0_map.flat, ".")
+plt.plot([0, max_delay], [0, max_delay], "k")
+plt.ylabel("Model #0 Delay")
+plt.xlabel("Actual Delay")
+plt.grid()
+plt.show()
+
+print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model0_map)**2)))
+print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 4])**2)))
+print()
+
+#%% Generate Model #1
 
 def nonlinearPreprocessor1(dx, dy):
     dx, dy = abs(dx), abs(dy)
@@ -117,8 +222,9 @@ b = np.zeros(41*41)
 index = 0
 for x in range(41):
     for y in range(41):
-        A[index, :] = nonlinearPreprocessor1(x-20, y-20)
-        b[index] = delay_map[x, y]
+        if delay_map_count[x, y] > 0:
+            A[index, :] = nonlinearPreprocessor1(x-20, y-20)
+            b[index] = delay_map[x, y]
         index += 1
 
 model1_params, _, _, _ = np.linalg.lstsq(A, b)
@@ -141,61 +247,111 @@ plt.imshow(model1_map - delay_map)
 plt.colorbar()
 plt.show()
 
+for i in range(delay_data.shape[0]):
+    dx = delay_data[i, 2]
+    dy = delay_data[i, 3]
+    delay_data[i, 5] = np.dot(model1_params, nonlinearPreprocessor1(dx, dy))
+
 plt.figure(figsize=(8, 3))
-plt.title("Model #1  vs Actual Delay")
+plt.title("Model #1 vs Actual Delay")
+plt.plot(delay_data[:, 0], delay_data[:, 5], ".")
 plt.plot(delay_map.flat, model1_map.flat, ".")
-plt.plot([0, 4000], [0, 4000], "k")
+plt.plot([0, max_delay], [0, max_delay], "k")
 plt.ylabel("Model #1 Delay")
 plt.xlabel("Actual Delay")
 plt.grid()
 plt.show()
 
-print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model1_map)**2)))
+print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model1_map)**2)))
+print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 5])**2)))
 print()
 
-if True:
-    def nonlinearPreprocessor2(v):
-        return np.array([1, v, np.sqrt(v)])
+#%% Generate Model #2
 
-    A = np.zeros((41*41, len(nonlinearPreprocessor2(0))))
-    b = np.zeros(41*41)
+def nonlinearPreprocessor2(v):
+    return np.array([1, v, np.sqrt(v)])
 
-    index = 0
-    for x in range(41):
-        for y in range(41):
+A = np.zeros((41*41, len(nonlinearPreprocessor2(0))))
+b = np.zeros(41*41)
+
+index = 0
+for x in range(41):
+    for y in range(41):
+        if delay_map_count[x, y] > 0:
             A[index, :] = nonlinearPreprocessor2(model1_map[x, y])
             b[index] = delay_map[x, y]
-            index += 1
+        index += 1
 
-    model2_params, _, _, _ = np.linalg.lstsq(A, b)
-    print("Model #2 parameters:", model2_params)
+model2_params, _, _, _ = np.linalg.lstsq(A, b)
+print("Model #2 parameters:", model2_params)
 
-    model2_map = np.zeros((41, 41))
-    for x in range(41):
-        for y in range(41):
-            v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20))
-            v = np.dot(model2_params, nonlinearPreprocessor2(v))
-            model2_map[x, y] = v
-
-    plt.figure(figsize=(9, 3))
-    plt.subplot(121)
-    plt.title("Model #2 Delay Map")
-    plt.imshow(model2_map)
-    plt.colorbar()
-    plt.subplot(122)
-    plt.title("Model #2 Error Map")
-    plt.imshow(model2_map - delay_map)
-    plt.colorbar()
-    plt.show()
-
-    plt.figure(figsize=(8, 3))
-    plt.title("Model #2 vs Actual Delay")
-    plt.plot(delay_map.flat, model2_map.flat, ".")
-    plt.plot([0, 4000], [0, 4000], "k")
-    plt.ylabel("Model #2 Delay")
-    plt.xlabel("Actual Delay")
-    plt.grid()
-    plt.show()
-
-    print("Total RMS error: %f" % np.sqrt(np.mean((delay_map - model2_map)**2)))
-    print()
+model2_map = np.zeros((41, 41))
+for x in range(41):
+    for y in range(41):
+        v = np.dot(model1_params, nonlinearPreprocessor1(x-20, y-20))
+        v = np.dot(model2_params, nonlinearPreprocessor2(v))
+        model2_map[x, y] = v
+
+plt.figure(figsize=(9, 3))
+plt.subplot(121)
+plt.title("Model #2 Delay Map")
+plt.imshow(model2_map)
+plt.colorbar()
+plt.subplot(122)
+plt.title("Model #2 Error Map")
+plt.imshow(model2_map - delay_map)
+plt.colorbar()
+plt.show()
+
+for i in range(delay_data.shape[0]):
+    dx = delay_data[i, 2]
+    dy = delay_data[i, 3]
+    delay_data[i, 6] = np.dot(model2_params, nonlinearPreprocessor2(delay_data[i, 5]))
+
+plt.figure(figsize=(8, 3))
+plt.title("Model #2 vs Actual Delay")
+plt.plot(delay_data[:, 0], delay_data[:, 6], ".")
+plt.plot(delay_map.flat, model2_map.flat, ".")
+plt.plot([0, max_delay], [0, max_delay], "k")
+plt.ylabel("Model #2 Delay")
+plt.xlabel("Actual Delay")
+plt.grid()
+plt.show()
+
+print("In-sample RMS error: %f" % np.sqrt(np.nanmean((delay_map - model2_map)**2)))
+print("Out-of-sample RMS error: %f" % np.sqrt(np.nanmean((delay_data[:, 0] - delay_data[:, 6])**2)))
+print()
+
+#%% Generate deltas for different source net types
+
+type_deltas = dict()
+
+print("Delay deltas for different src types:")
+for src_type in sorted(type_delta_data.keys()):
+    deltas = list()
+
+    for dx, dy, delay in type_delta_data[src_type]:
+        dx = abs(dx)
+        dy = abs(dy)
+
+        if dx > 1 or dy > 1:
+            est = model0_params[0] + model0_params[1] * (dx + dy)
+        else:
+            est = mean_neighbour_tile_delays
+        deltas.append(delay - est)
+
+    print("%15s: %8.2f (std %6.2f)" % (\
+            src_type, np.mean(deltas), np.std(deltas)))
+
+    type_deltas[src_type] = np.mean(deltas)
+
+#%% Print C defs of model parameters
+
+print("--snip--")
+print("%d, %d, %d," % (mean_neighbour_tile_delays, 128 * model0_params[0], 128 * model0_params[1]))
+print("%d, %d, %d, %d," % (128 * model1_params[0], 128 * model1_params[1], 128 * model1_params[2], 128 * model1_params[3]))
+print("%d, %d, %d," % (128 * model2_params[0], 128 * model2_params[1], 128 * model2_params[2]))
+print("%d, %d, %d, %d" % (type_deltas["LOCAL"], type_deltas["LUTFF_IN"], \
+                          (type_deltas["SP4_H"] + type_deltas["SP4_V"]) / 2,
+                          (type_deltas["SP12_H"] + type_deltas["SP12_V"]) / 2))
+print("--snap--")