aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/patches-3.18/080-02-fib_trie-Update-usage-stats-to-be-percpu-instead-of-.patch
blob: 2e6deb5bbc366a22ce0a83ffebf0a2424d256ade (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:29 -0800
Subject: [PATCH] fib_trie: Update usage stats to be percpu instead of
 global variables

The trie usage stats were currently being shared by all threads that were
calling fib_table_lookup.  As a result when multiple threads were
performing lookups simultaneously the trie would begin to cache bounce
between those threads.

In order to prevent this I have updated the usage stats to use a set of
percpu variables.  By doing this we should be able to avoid the cache
bouncing and still make use of these stats.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---

--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(st
 	return 0;
 
 fail:
-	kfree(local_table);
+	fib_free_table(local_table);
 	return -ENOMEM;
 }
 #else
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -153,7 +153,7 @@ struct trie_stat {
 struct trie {
 	struct rt_trie_node __rcu *trie;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-	struct trie_use_stats stats;
+	struct trie_use_stats __percpu *stats;
 #endif
 };
 
@@ -631,7 +631,7 @@ static struct rt_trie_node *resize(struc
 		if (IS_ERR(tn)) {
 			tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.resize_node_skipped++;
+			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
@@ -658,7 +658,7 @@ static struct rt_trie_node *resize(struc
 		if (IS_ERR(tn)) {
 			tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.resize_node_skipped++;
+			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
@@ -1357,7 +1357,7 @@ static int check_leaf(struct fib_table *
 			err = fib_props[fa->fa_type].error;
 			if (err) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				t->stats.semantic_match_passed++;
+				this_cpu_inc(t->stats->semantic_match_passed);
 #endif
 				return err;
 			}
@@ -1372,7 +1372,7 @@ static int check_leaf(struct fib_table *
 					continue;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				t->stats.semantic_match_passed++;
+				this_cpu_inc(t->stats->semantic_match_passed);
 #endif
 				res->prefixlen = li->plen;
 				res->nh_sel = nhsel;
@@ -1388,7 +1388,7 @@ static int check_leaf(struct fib_table *
 		}
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-		t->stats.semantic_match_miss++;
+		this_cpu_inc(t->stats->semantic_match_miss);
 #endif
 	}
 
@@ -1399,6 +1399,9 @@ int fib_table_lookup(struct fib_table *t
 		     struct fib_result *res, int fib_flags)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	struct trie_use_stats __percpu *stats = t->stats;
+#endif
 	int ret;
 	struct rt_trie_node *n;
 	struct tnode *pn;
@@ -1417,7 +1420,7 @@ int fib_table_lookup(struct fib_table *t
 		goto failed;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-	t->stats.gets++;
+	this_cpu_inc(stats->gets);
 #endif
 
 	/* Just a leaf? */
@@ -1441,7 +1444,7 @@ int fib_table_lookup(struct fib_table *t
 
 		if (n == NULL) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.null_node_hit++;
+			this_cpu_inc(stats->null_node_hit);
 #endif
 			goto backtrace;
 		}
@@ -1576,7 +1579,7 @@ backtrace:
 			chopped_off = 0;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.backtrack++;
+			this_cpu_inc(stats->backtrack);
 #endif
 			goto backtrace;
 		}
@@ -1830,6 +1833,11 @@ int fib_table_flush(struct fib_table *tb
 
 void fib_free_table(struct fib_table *tb)
 {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	struct trie *t = (struct trie *)tb->tb_data;
+
+	free_percpu(t->stats);
+#endif /* CONFIG_IP_FIB_TRIE_STATS */
 	kfree(tb);
 }
 
@@ -1973,7 +1981,14 @@ struct fib_table *fib_trie_table(u32 id)
 	tb->tb_num_default = 0;
 
 	t = (struct trie *) tb->tb_data;
-	memset(t, 0, sizeof(*t));
+	RCU_INIT_POINTER(t->trie, NULL);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	t->stats = alloc_percpu(struct trie_use_stats);
+	if (!t->stats) {
+		kfree(tb);
+		tb = NULL;
+	}
+#endif
 
 	return tb;
 }
@@ -2139,18 +2154,31 @@ static void trie_show_stats(struct seq_f
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 static void trie_show_usage(struct seq_file *seq,
-			    const struct trie_use_stats *stats)
+			    const struct trie_use_stats __percpu *stats)
 {
+	struct trie_use_stats s = { 0 };
+	int cpu;
+
+	/* loop through all of the CPUs and gather up the stats */
+	for_each_possible_cpu(cpu) {
+		const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
+
+		s.gets += pcpu->gets;
+		s.backtrack += pcpu->backtrack;
+		s.semantic_match_passed += pcpu->semantic_match_passed;
+		s.semantic_match_miss += pcpu->semantic_match_miss;
+		s.null_node_hit += pcpu->null_node_hit;
+		s.resize_node_skipped += pcpu->resize_node_skipped;
+	}
+
 	seq_printf(seq, "\nCounters:\n---------\n");
-	seq_printf(seq, "gets = %u\n", stats->gets);
-	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
+	seq_printf(seq, "gets = %u\n", s.gets);
+	seq_printf(seq, "backtracks = %u\n", s.backtrack);
 	seq_printf(seq, "semantic match passed = %u\n",
-		   stats->semantic_match_passed);
-	seq_printf(seq, "semantic match miss = %u\n",
-		   stats->semantic_match_miss);
-	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
-	seq_printf(seq, "skipped node resize = %u\n\n",
-		   stats->resize_node_skipped);
+		   s.semantic_match_passed);
+	seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
+	seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
+	seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
 }
 #endif /*  CONFIG_IP_FIB_TRIE_STATS */
 
@@ -2191,7 +2219,7 @@ static int fib_triestat_seq_show(struct
 			trie_collect_stats(t, &stat);
 			trie_show_stats(seq, &stat);
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			trie_show_usage(seq, &t->stats);
+			trie_show_usage(seq, t->stats);
 #endif
 		}
 	}
pan> /---------------------------------------------------------------------------*/ #define _VOLUMES 1 /* Number of volumes (logical drives) to be used. */ #define _STR_VOLUME_ID 0 /* 0:Use only 0-9 for drive ID, 1:Use strings for drive ID */ #define _VOLUME_STRS "RAM","NAND","CF","SD1","SD2","USB1","USB2","USB3" /* When _STR_VOLUME_ID is set to 1, also pre-defined strings can be used as drive / number in the path name. _VOLUME_STRS defines the drive ID strings for each logical / drives. Number of items must be equal to _VOLUMES. Valid characters for the drive ID / strings are: 0-9 and A-Z. */ #define _MULTI_PARTITION 0 /* 0:Single partition, 1:Enable multiple partition */ /* By default(0), each logical drive number is bound to the same physical drive number / and only a FAT volume found on the physical drive is mounted. When it is set to 1, / each logical drive number is bound to arbitrary drive/partition listed in VolToPart[]. */ #define _MIN_SS 512 #define _MAX_SS 512 /* These options configure the range of sector size to be supported. (512, 1024, 2048 or / 4096) Always set both 512 for most systems, all memory card and harddisk. But a larger / value may be required for on-board flash memory and some type of optical media. / When _MAX_SS is larger than _MIN_SS, FatFs is configured to variable sector size and / GET_SECTOR_SIZE command must be implemented to the disk_ioctl() function. */ #define _USE_ERASE 0 /* 0:Disable or 1:Enable */ /* To enable sector erase feature, set _USE_ERASE to 1. Also CTRL_ERASE_SECTOR command / should be added to the disk_ioctl() function. */ #define _FS_NOFSINFO 0 /* 0 to 3 */ /* If you need to know correct free space on the FAT32 volume, set bit 0 of this option / and f_getfree() function at first time after volume mount will force a full FAT scan. / Bit 1 controls the last allocated cluster number as bit 0. / / bit0=0: Use free cluster count in the FSINFO if available. / bit0=1: Do not trust free cluster count in the FSINFO. / bit1=0: Use last allocated cluster number in the FSINFO if available. / bit1=1: Do not trust last allocated cluster number in the FSINFO. */ /*---------------------------------------------------------------------------/ / System Configurations /---------------------------------------------------------------------------*/ #define _FS_LOCK 0 /* 0:Disable or >=1:Enable */ /* To enable file lock control feature, set _FS_LOCK to non-zero value. / The value defines how many files/sub-directories can be opened simultaneously / with file lock control. This feature uses bss _FS_LOCK * 12 bytes. */ #define _FS_REENTRANT 0 /* 0:Disable or 1:Enable */ #define _FS_TIMEOUT 1000 /* Timeout period in unit of time tick */ #define _SYNC_t semaphore_t* /* O/S dependent sync object type. e.g. HANDLE, OS_EVENT*, ID, SemaphoreHandle_t and etc.. */ /* The _FS_REENTRANT option switches the re-entrancy (thread safe) of the FatFs module. / / 0: Disable re-entrancy. _FS_TIMEOUT and _SYNC_t have no effect. / 1: Enable re-entrancy. Also user provided synchronization handlers, / ff_req_grant(), ff_rel_grant(), ff_del_syncobj() and ff_cre_syncobj() / function must be added to the project. */ #define _WORD_ACCESS 0 /* 0 or 1 */ /* The _WORD_ACCESS option is an only platform dependent option. It defines / which access method is used to the word data on the FAT volume. / / 0: Byte-by-byte access. Always compatible with all platforms. / 1: Word access. Do not choose this unless under both the following conditions. / / * Address misaligned memory access is always allowed for ALL instructions. / * Byte order on the memory is little-endian. / / If it is the case, _WORD_ACCESS can also be set to 1 to improve performance and / reduce code size. Following table shows an example of some processor types. / / ARM7TDMI 0 ColdFire 0 V850E2 0 / Cortex-M3 0 Z80 0/1 V850ES 0/1 / Cortex-M0 0 RX600(LE) 0/1 TLCS-870 0/1 / AVR 0/1 RX600(BE) 0 TLCS-900 0/1 / AVR32 0 RL78 0 R32C 0 / PIC18 0/1 SH-2 0 M16C 0/1 / PIC24 0 H8S 0 MSP430 0 / PIC32 0 H8/300H 0 x86 0/1 */ #endif /* _FFCONF */