1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
|
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:49 -0800
Subject: [PATCH] fib_trie: Push tnode flushing down to inflate/halve
This change pushes the tnode freeing down into the inflate and halve
functions. It makes more sense here as we have a better grasp of what is
going on and when a given cluster of nodes is ready to be freed.
I believe this may address a bug in the freeing logic as well. For some
reason if the freelist got to a certain size we would call
synchronize_rcu(). I'm assuming that what they meant to do is call
synchronize_rcu() after they had handed off that much memory via
call_rcu(). As such that is what I have updated the behavior to be.
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -147,8 +147,6 @@ struct trie {
};
static void resize(struct trie *t, struct tnode *tn);
-/* tnodes to free after resize(); protected by RTNL */
-static struct callback_head *tnode_free_head;
static size_t tnode_free_size;
/*
@@ -307,32 +305,6 @@ static struct tnode *tnode_alloc(size_t
return vzalloc(size);
}
-static void tnode_free_safe(struct tnode *tn)
-{
- BUG_ON(IS_LEAF(tn));
- tn->rcu.next = tnode_free_head;
- tnode_free_head = &tn->rcu;
-}
-
-static void tnode_free_flush(void)
-{
- struct callback_head *head;
-
- while ((head = tnode_free_head)) {
- struct tnode *tn = container_of(head, struct tnode, rcu);
-
- tnode_free_head = head->next;
- tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
-
- node_free(tn);
- }
-
- if (tnode_free_size >= PAGE_SIZE * sync_pages) {
- tnode_free_size = 0;
- synchronize_rcu();
- }
-}
-
static struct tnode *leaf_new(t_key key)
{
struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
@@ -433,17 +405,33 @@ static void put_child_root(struct tnode
rcu_assign_pointer(t->trie, n);
}
-static void tnode_clean_free(struct tnode *tn)
+static inline void tnode_free_init(struct tnode *tn)
{
- struct tnode *tofree;
- unsigned long i;
+ tn->rcu.next = NULL;
+}
+
+static inline void tnode_free_append(struct tnode *tn, struct tnode *n)
+{
+ n->rcu.next = tn->rcu.next;
+ tn->rcu.next = &n->rcu;
+}
- for (i = 0; i < tnode_child_length(tn); i++) {
- tofree = tnode_get_child(tn, i);
- if (tofree)
- node_free(tofree);
+static void tnode_free(struct tnode *tn)
+{
+ struct callback_head *head = &tn->rcu;
+
+ while (head) {
+ head = head->next;
+ tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
+ node_free(tn);
+
+ tn = container_of(head, struct tnode, rcu);
+ }
+
+ if (tnode_free_size >= PAGE_SIZE * sync_pages) {
+ tnode_free_size = 0;
+ synchronize_rcu();
}
- node_free(tn);
}
static int inflate(struct trie *t, struct tnode *oldtnode)
@@ -476,20 +464,23 @@ static int inflate(struct trie *t, struc
inode->bits - 1);
if (!left)
goto nomem;
+ tnode_free_append(tn, left);
right = tnode_new(inode->key | m, inode->pos,
inode->bits - 1);
- if (!right) {
- node_free(left);
+ if (!right)
goto nomem;
- }
+ tnode_free_append(tn, right);
put_child(tn, 2*i, left);
put_child(tn, 2*i+1, right);
}
}
+ /* prepare oldtnode to be freed */
+ tnode_free_init(oldtnode);
+
for (i = 0; i < olen; i++) {
struct tnode *inode = tnode_get_child(oldtnode, i);
struct tnode *left, *right;
@@ -505,12 +496,13 @@ static int inflate(struct trie *t, struc
continue;
}
+ /* drop the node in the old tnode free list */
+ tnode_free_append(oldtnode, inode);
+
/* An internal node with two children */
if (inode->bits == 1) {
put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
-
- tnode_free_safe(inode);
continue;
}
@@ -556,17 +548,19 @@ static int inflate(struct trie *t, struc
put_child(tn, 2 * i, left);
put_child(tn, 2 * i + 1, right);
- tnode_free_safe(inode);
-
+ /* resize child nodes */
resize(t, left);
resize(t, right);
}
put_child_root(tp, t, tn->key, tn);
- tnode_free_safe(oldtnode);
+
+ /* we completed without error, prepare to free old node */
+ tnode_free(oldtnode);
return 0;
nomem:
- tnode_clean_free(tn);
+ /* all pointers should be clean so we are done */
+ tnode_free(tn);
return -ENOMEM;
}
@@ -599,17 +593,20 @@ static int halve(struct trie *t, struct
struct tnode *newn;
newn = tnode_new(left->key, oldtnode->pos, 1);
-
if (!newn) {
- tnode_clean_free(tn);
+ tnode_free(tn);
return -ENOMEM;
}
+ tnode_free_append(tn, newn);
put_child(tn, i/2, newn);
}
}
+ /* prepare oldtnode to be freed */
+ tnode_free_init(oldtnode);
+
for (i = 0; i < olen; i += 2) {
struct tnode *newBinNode;
@@ -636,11 +633,14 @@ static int halve(struct trie *t, struct
put_child(tn, i / 2, newBinNode);
+ /* resize child node */
resize(t, newBinNode);
}
put_child_root(tp, t, tn->key, tn);
- tnode_free_safe(oldtnode);
+
+ /* all pointers should be clean so we are done */
+ tnode_free(oldtnode);
return 0;
}
@@ -798,7 +798,8 @@ no_children:
node_set_parent(n, tp);
/* drop dead node */
- tnode_free_safe(tn);
+ tnode_free_init(tn);
+ tnode_free(tn);
}
}
@@ -884,16 +885,12 @@ static void trie_rebalance(struct trie *
while ((tp = node_parent(tn)) != NULL) {
resize(t, tn);
-
- tnode_free_flush();
tn = tp;
}
/* Handle last (top) tnode */
if (IS_TNODE(tn))
resize(t, tn);
-
- tnode_free_flush();
}
/* only used from updater-side */
|