aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-11-19 12:49:59 -0500
committerDavid S. Miller <davem@davemloft.net>2010-11-19 12:49:59 -0500
commit93aaae2e01e57483256b7da05c9a7ebd65ad4686 (patch)
treeb3e4117bbf39814ef58ce1d012d977d2d5393c38
parent0a80410dc53cf68e56456bef1ca66949b87412f9 (diff)
filter: optimize sk_run_filter
Remove pc variable to avoid arithmetic to compute fentry at each filter instruction. Jumps directly manipulate fentry pointer. As the last instruction of filter[] is guaranteed to be a RETURN, and all jumps are before the last instruction, we dont need to check filter bounds (number of instructions in filter array) at each iteration, so we remove it from sk_run_filter() params. On x86_32 remove f_k var introduced in commit 57fe93b374a6b871 (filter: make sure filters dont read uninitialized memory) Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to avoid too many ifdefs in this code. This helps compiler to use cpu registers to hold fentry and A accumulator. On x86_32, this saves 401 bytes, and more important, sk_run_filter() runs much faster because less register pressure (One less conditional branch per BPF instruction) # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 2948 0 0 2948 b84 net/core/filter.o 3349 0 0 3349 d15 net/core/filter_pre.o on x86_64 : # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 5173 0 0 5173 1435 net/core/filter.o 5224 0 0 5224 1468 net/core/filter_pre.o Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Changli Gao <xiaosuo@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/isdn/i4l/isdn_ppp.c14
-rw-r--r--drivers/net/ppp_generic.c12
-rw-r--r--include/linux/filter.h2
-rw-r--r--net/core/filter.c93
-rw-r--r--net/core/timestamping.c2
-rw-r--r--net/packet/af_packet.c2
6 files changed, 61 insertions, 64 deletions
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 97c5cc2997f5..9e8162c80bb0 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
1147 } 1147 }
1148 1148
1149 if (is->pass_filter 1149 if (is->pass_filter
1150 && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) { 1150 && sk_run_filter(skb, is->pass_filter) == 0) {
1151 if (is->debug & 0x2) 1151 if (is->debug & 0x2)
1152 printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); 1152 printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
1153 kfree_skb(skb); 1153 kfree_skb(skb);
1154 return; 1154 return;
1155 } 1155 }
1156 if (!(is->active_filter 1156 if (!(is->active_filter
1157 && sk_run_filter(skb, is->active_filter, 1157 && sk_run_filter(skb, is->active_filter) == 0)) {
1158 is->active_len) == 0)) {
1159 if (is->debug & 0x2) 1158 if (is->debug & 0x2)
1160 printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); 1159 printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
1161 lp->huptimer = 0; 1160 lp->huptimer = 0;
@@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
1294 } 1293 }
1295 1294
1296 if (ipt->pass_filter 1295 if (ipt->pass_filter
1297 && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) { 1296 && sk_run_filter(skb, ipt->pass_filter) == 0) {
1298 if (ipt->debug & 0x4) 1297 if (ipt->debug & 0x4)
1299 printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); 1298 printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
1300 kfree_skb(skb); 1299 kfree_skb(skb);
1301 goto unlock; 1300 goto unlock;
1302 } 1301 }
1303 if (!(ipt->active_filter 1302 if (!(ipt->active_filter
1304 && sk_run_filter(skb, ipt->active_filter, 1303 && sk_run_filter(skb, ipt->active_filter) == 0)) {
1305 ipt->active_len) == 0)) {
1306 if (ipt->debug & 0x4) 1304 if (ipt->debug & 0x4)
1307 printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); 1305 printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
1308 lp->huptimer = 0; 1306 lp->huptimer = 0;
@@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
1492 } 1490 }
1493 1491
1494 drop |= is->pass_filter 1492 drop |= is->pass_filter
1495 && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0; 1493 && sk_run_filter(skb, is->pass_filter) == 0;
1496 drop |= is->active_filter 1494 drop |= is->active_filter
1497 && sk_run_filter(skb, is->active_filter, is->active_len) == 0; 1495 && sk_run_filter(skb, is->active_filter) == 0;
1498 1496
1499 skb_push(skb, IPPP_MAX_HEADER - 4); 1497 skb_push(skb, IPPP_MAX_HEADER - 4);
1500 return drop; 1498 return drop;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 09cf56d0416a..0c91598ae280 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
1136 a four-byte PPP header on each packet */ 1136 a four-byte PPP header on each packet */
1137 *skb_push(skb, 2) = 1; 1137 *skb_push(skb, 2) = 1;
1138 if (ppp->pass_filter && 1138 if (ppp->pass_filter &&
1139 sk_run_filter(skb, ppp->pass_filter, 1139 sk_run_filter(skb, ppp->pass_filter) == 0) {
1140 ppp->pass_len) == 0) {
1141 if (ppp->debug & 1) 1140 if (ppp->debug & 1)
1142 printk(KERN_DEBUG "PPP: outbound frame not passed\n"); 1141 printk(KERN_DEBUG "PPP: outbound frame not passed\n");
1143 kfree_skb(skb); 1142 kfree_skb(skb);
@@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
1145 } 1144 }
1146 /* if this packet passes the active filter, record the time */ 1145 /* if this packet passes the active filter, record the time */
1147 if (!(ppp->active_filter && 1146 if (!(ppp->active_filter &&
1148 sk_run_filter(skb, ppp->active_filter, 1147 sk_run_filter(skb, ppp->active_filter) == 0))
1149 ppp->active_len) == 0))
1150 ppp->last_xmit = jiffies; 1148 ppp->last_xmit = jiffies;
1151 skb_pull(skb, 2); 1149 skb_pull(skb, 2);
1152#else 1150#else
@@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
1758 1756
1759 *skb_push(skb, 2) = 0; 1757 *skb_push(skb, 2) = 0;
1760 if (ppp->pass_filter && 1758 if (ppp->pass_filter &&
1761 sk_run_filter(skb, ppp->pass_filter, 1759 sk_run_filter(skb, ppp->pass_filter) == 0) {
1762 ppp->pass_len) == 0) {
1763 if (ppp->debug & 1) 1760 if (ppp->debug & 1)
1764 printk(KERN_DEBUG "PPP: inbound frame " 1761 printk(KERN_DEBUG "PPP: inbound frame "
1765 "not passed\n"); 1762 "not passed\n");
@@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
1767 return; 1764 return;
1768 } 1765 }
1769 if (!(ppp->active_filter && 1766 if (!(ppp->active_filter &&
1770 sk_run_filter(skb, ppp->active_filter, 1767 sk_run_filter(skb, ppp->active_filter) == 0))
1771 ppp->active_len) == 0))
1772 ppp->last_recv = jiffies; 1768 ppp->last_recv = jiffies;
1773 __skb_pull(skb, 2); 1769 __skb_pull(skb, 2);
1774 } else 1770 } else
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d703b7e..447a775878fb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
147 147
148extern int sk_filter(struct sock *sk, struct sk_buff *skb); 148extern int sk_filter(struct sock *sk, struct sk_buff *skb);
149extern unsigned int sk_run_filter(struct sk_buff *skb, 149extern unsigned int sk_run_filter(struct sk_buff *skb,
150 struct sock_filter *filter, int flen); 150 const struct sock_filter *filter);
151extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 151extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
152extern int sk_detach_filter(struct sock *sk); 152extern int sk_detach_filter(struct sock *sk);
153extern int sk_chk_filter(struct sock_filter *filter, int flen); 153extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d39cd3..9e77b3c816c5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
137 rcu_read_lock_bh(); 137 rcu_read_lock_bh();
138 filter = rcu_dereference_bh(sk->sk_filter); 138 filter = rcu_dereference_bh(sk->sk_filter);
139 if (filter) { 139 if (filter) {
140 unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); 140 unsigned int pkt_len = sk_run_filter(skb, filter->insns);
141 141
142 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 142 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
143 } 143 }
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
151 * sk_run_filter - run a filter on a socket 151 * sk_run_filter - run a filter on a socket
152 * @skb: buffer to run the filter on 152 * @skb: buffer to run the filter on
153 * @filter: filter to apply 153 * @filter: filter to apply
154 * @flen: length of filter
155 * 154 *
156 * Decode and apply filter instructions to the skb->data. 155 * Decode and apply filter instructions to the skb->data.
157 * Return length to keep, 0 for none. skb is the data we are 156 * Return length to keep, 0 for none. @skb is the data we are
158 * filtering, filter is the array of filter instructions, and 157 * filtering, @filter is the array of filter instructions.
159 * len is the number of filter blocks in the array. 158 * Because all jumps are guaranteed to be before last instruction,
159 * and last instruction guaranteed to be a RET, we dont need to check
160 * flen. (We used to pass to this function the length of filter)
160 */ 161 */
161unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 162unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
162{ 163{
163 void *ptr; 164 void *ptr;
164 u32 A = 0; /* Accumulator */ 165 u32 A = 0; /* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
167 unsigned long memvalid = 0; 168 unsigned long memvalid = 0;
168 u32 tmp; 169 u32 tmp;
169 int k; 170 int k;
170 int pc;
171 171
172 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); 172 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
173 /* 173 /*
174 * Process array of filter instructions. 174 * Process array of filter instructions.
175 */ 175 */
176 for (pc = 0; pc < flen; pc++) { 176 for (;; fentry++) {
177 const struct sock_filter *fentry = &filter[pc]; 177#if defined(CONFIG_X86_32)
178 u32 f_k = fentry->k; 178#define K (fentry->k)
179#else
180 const u32 K = fentry->k;
181#endif
179 182
180 switch (fentry->code) { 183 switch (fentry->code) {
181 case BPF_S_ALU_ADD_X: 184 case BPF_S_ALU_ADD_X:
182 A += X; 185 A += X;
183 continue; 186 continue;
184 case BPF_S_ALU_ADD_K: 187 case BPF_S_ALU_ADD_K:
185 A += f_k; 188 A += K;
186 continue; 189 continue;
187 case BPF_S_ALU_SUB_X: 190 case BPF_S_ALU_SUB_X:
188 A -= X; 191 A -= X;
189 continue; 192 continue;
190 case BPF_S_ALU_SUB_K: 193 case BPF_S_ALU_SUB_K:
191 A -= f_k; 194 A -= K;
192 continue; 195 continue;
193 case BPF_S_ALU_MUL_X: 196 case BPF_S_ALU_MUL_X:
194 A *= X; 197 A *= X;
195 continue; 198 continue;
196 case BPF_S_ALU_MUL_K: 199 case BPF_S_ALU_MUL_K:
197 A *= f_k; 200 A *= K;
198 continue; 201 continue;
199 case BPF_S_ALU_DIV_X: 202 case BPF_S_ALU_DIV_X:
200 if (X == 0) 203 if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
202 A /= X; 205 A /= X;
203 continue; 206 continue;
204 case BPF_S_ALU_DIV_K: 207 case BPF_S_ALU_DIV_K:
205 A /= f_k; 208 A /= K;
206 continue; 209 continue;
207 case BPF_S_ALU_AND_X: 210 case BPF_S_ALU_AND_X:
208 A &= X; 211 A &= X;
209 continue; 212 continue;
210 case BPF_S_ALU_AND_K: 213 case BPF_S_ALU_AND_K:
211 A &= f_k; 214 A &= K;
212 continue; 215 continue;
213 case BPF_S_ALU_OR_X: 216 case BPF_S_ALU_OR_X:
214 A |= X; 217 A |= X;
215 continue; 218 continue;
216 case BPF_S_ALU_OR_K: 219 case BPF_S_ALU_OR_K:
217 A |= f_k; 220 A |= K;
218 continue; 221 continue;
219 case BPF_S_ALU_LSH_X: 222 case BPF_S_ALU_LSH_X:
220 A <<= X; 223 A <<= X;
221 continue; 224 continue;
222 case BPF_S_ALU_LSH_K: 225 case BPF_S_ALU_LSH_K:
223 A <<= f_k; 226 A <<= K;
224 continue; 227 continue;
225 case BPF_S_ALU_RSH_X: 228 case BPF_S_ALU_RSH_X:
226 A >>= X; 229 A >>= X;
227 continue; 230 continue;
228 case BPF_S_ALU_RSH_K: 231 case BPF_S_ALU_RSH_K:
229 A >>= f_k; 232 A >>= K;
230 continue; 233 continue;
231 case BPF_S_ALU_NEG: 234 case BPF_S_ALU_NEG:
232 A = -A; 235 A = -A;
233 continue; 236 continue;
234 case BPF_S_JMP_JA: 237 case BPF_S_JMP_JA:
235 pc += f_k; 238 fentry += K;
236 continue; 239 continue;
237 case BPF_S_JMP_JGT_K: 240 case BPF_S_JMP_JGT_K:
238 pc += (A > f_k) ? fentry->jt : fentry->jf; 241 fentry += (A > K) ? fentry->jt : fentry->jf;
239 continue; 242 continue;
240 case BPF_S_JMP_JGE_K: 243 case BPF_S_JMP_JGE_K:
241 pc += (A >= f_k) ? fentry->jt : fentry->jf; 244 fentry += (A >= K) ? fentry->jt : fentry->jf;
242 continue; 245 continue;
243 case BPF_S_JMP_JEQ_K: 246 case BPF_S_JMP_JEQ_K:
244 pc += (A == f_k) ? fentry->jt : fentry->jf; 247 fentry += (A == K) ? fentry->jt : fentry->jf;
245 continue; 248 continue;
246 case BPF_S_JMP_JSET_K: 249 case BPF_S_JMP_JSET_K:
247 pc += (A & f_k) ? fentry->jt : fentry->jf; 250 fentry += (A & K) ? fentry->jt : fentry->jf;
248 continue; 251 continue;
249 case BPF_S_JMP_JGT_X: 252 case BPF_S_JMP_JGT_X:
250 pc += (A > X) ? fentry->jt : fentry->jf; 253 fentry += (A > X) ? fentry->jt : fentry->jf;
251 continue; 254 continue;
252 case BPF_S_JMP_JGE_X: 255 case BPF_S_JMP_JGE_X:
253 pc += (A >= X) ? fentry->jt : fentry->jf; 256 fentry += (A >= X) ? fentry->jt : fentry->jf;
254 continue; 257 continue;
255 case BPF_S_JMP_JEQ_X: 258 case BPF_S_JMP_JEQ_X:
256 pc += (A == X) ? fentry->jt : fentry->jf; 259 fentry += (A == X) ? fentry->jt : fentry->jf;
257 continue; 260 continue;
258 case BPF_S_JMP_JSET_X: 261 case BPF_S_JMP_JSET_X:
259 pc += (A & X) ? fentry->jt : fentry->jf; 262 fentry += (A & X) ? fentry->jt : fentry->jf;
260 continue; 263 continue;
261 case BPF_S_LD_W_ABS: 264 case BPF_S_LD_W_ABS:
262 k = f_k; 265 k = K;
263load_w: 266load_w:
264 ptr = load_pointer(skb, k, 4, &tmp); 267 ptr = load_pointer(skb, k, 4, &tmp);
265 if (ptr != NULL) { 268 if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
268 } 271 }
269 break; 272 break;
270 case BPF_S_LD_H_ABS: 273 case BPF_S_LD_H_ABS:
271 k = f_k; 274 k = K;
272load_h: 275load_h:
273 ptr = load_pointer(skb, k, 2, &tmp); 276 ptr = load_pointer(skb, k, 2, &tmp);
274 if (ptr != NULL) { 277 if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
277 } 280 }
278 break; 281 break;
279 case BPF_S_LD_B_ABS: 282 case BPF_S_LD_B_ABS:
280 k = f_k; 283 k = K;
281load_b: 284load_b:
282 ptr = load_pointer(skb, k, 1, &tmp); 285 ptr = load_pointer(skb, k, 1, &tmp);
283 if (ptr != NULL) { 286 if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
292 X = skb->len; 295 X = skb->len;
293 continue; 296 continue;
294 case BPF_S_LD_W_IND: 297 case BPF_S_LD_W_IND:
295 k = X + f_k; 298 k = X + K;
296 goto load_w; 299 goto load_w;
297 case BPF_S_LD_H_IND: 300 case BPF_S_LD_H_IND:
298 k = X + f_k; 301 k = X + K;
299 goto load_h; 302 goto load_h;
300 case BPF_S_LD_B_IND: 303 case BPF_S_LD_B_IND:
301 k = X + f_k; 304 k = X + K;
302 goto load_b; 305 goto load_b;
303 case BPF_S_LDX_B_MSH: 306 case BPF_S_LDX_B_MSH:
304 ptr = load_pointer(skb, f_k, 1, &tmp); 307 ptr = load_pointer(skb, K, 1, &tmp);
305 if (ptr != NULL) { 308 if (ptr != NULL) {
306 X = (*(u8 *)ptr & 0xf) << 2; 309 X = (*(u8 *)ptr & 0xf) << 2;
307 continue; 310 continue;
308 } 311 }
309 return 0; 312 return 0;
310 case BPF_S_LD_IMM: 313 case BPF_S_LD_IMM:
311 A = f_k; 314 A = K;
312 continue; 315 continue;
313 case BPF_S_LDX_IMM: 316 case BPF_S_LDX_IMM:
314 X = f_k; 317 X = K;
315 continue; 318 continue;
316 case BPF_S_LD_MEM: 319 case BPF_S_LD_MEM:
317 A = (memvalid & (1UL << f_k)) ? 320 A = (memvalid & (1UL << K)) ?
318 mem[f_k] : 0; 321 mem[K] : 0;
319 continue; 322 continue;
320 case BPF_S_LDX_MEM: 323 case BPF_S_LDX_MEM:
321 X = (memvalid & (1UL << f_k)) ? 324 X = (memvalid & (1UL << K)) ?
322 mem[f_k] : 0; 325 mem[K] : 0;
323 continue; 326 continue;
324 case BPF_S_MISC_TAX: 327 case BPF_S_MISC_TAX:
325 X = A; 328 X = A;
@@ -328,16 +331,16 @@ load_b:
328 A = X; 331 A = X;
329 continue; 332 continue;
330 case BPF_S_RET_K: 333 case BPF_S_RET_K:
331 return f_k; 334 return K;
332 case BPF_S_RET_A: 335 case BPF_S_RET_A:
333 return A; 336 return A;
334 case BPF_S_ST: 337 case BPF_S_ST:
335 memvalid |= 1UL << f_k; 338 memvalid |= 1UL << K;
336 mem[f_k] = A; 339 mem[K] = A;
337 continue; 340 continue;
338 case BPF_S_STX: 341 case BPF_S_STX:
339 memvalid |= 1UL << f_k; 342 memvalid |= 1UL << K;
340 mem[f_k] = X; 343 mem[K] = X;
341 continue; 344 continue;
342 default: 345 default:
343 WARN_ON(1); 346 WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..dac7ed687f60 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
31 if (likely(skb->dev && 31 if (likely(skb->dev &&
32 skb->dev->phydev && 32 skb->dev->phydev &&
33 skb->dev->phydev->drv)) 33 skb->dev->phydev->drv))
34 return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); 34 return sk_run_filter(skb, ptp_filter);
35 else 35 else
36 return PTP_CLASS_NONE; 36 return PTP_CLASS_NONE;
37} 37}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 20964560a0ed..b6372dd128d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
519 rcu_read_lock_bh(); 519 rcu_read_lock_bh();
520 filter = rcu_dereference_bh(sk->sk_filter); 520 filter = rcu_dereference_bh(sk->sk_filter);
521 if (filter != NULL) 521 if (filter != NULL)
522 res = sk_run_filter(skb, filter->insns, filter->len); 522 res = sk_run_filter(skb, filter->insns);
523 rcu_read_unlock_bh(); 523 rcu_read_unlock_bh();
524 524
525 return res; 525 return res;