aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2015-05-09 16:51:32 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-11 11:10:35 -0400
commitd2788d34885d4ce5ba17a8996fd95d28942e574e (patch)
tree8d57eceb0329c9f00fd1b00cc85ae497658219a6
parentc9e99fd078ef7fdcd9ee4f5a4cfdbece319587af (diff)
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify() that executes attached classifier(s) and action(s). It has a 1:1 relationship to dev->ingress_queue. After having commit 087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed the central ingress lock, one major contention point is gone. The extra indirection layers however, are not necessary for calling into ingress qdisc. pktgen calling locally into netif_receive_skb() with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps. We can redirect the private classifier list to the netdev directly, without changing any classifier API bits (!) and execute on that from handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed, ingress qdisc doesn't have a queue and thus dev_deactivate_queue() is also not applicable, ingress_cl_list provides similar behaviour. In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc. One next possible step is the removal of the dev's ingress (dummy) netdev_queue, and to only have the list member in the netdevice itself. Note, the filter chain is RCU protected and individual filter elements are being kfree'd by sched subsystem after RCU grace period. RCU read lock is being held by __netif_receive_skb_core(). Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--net/core/dev.c30
-rw-r--r--net/sched/sch_ingress.c58
3 files changed, 31 insertions, 61 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1899c74a7127..c4e1caf6056f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1655,7 +1655,11 @@ struct net_device {
1655 rx_handler_func_t __rcu *rx_handler; 1655 rx_handler_func_t __rcu *rx_handler;
1656 void __rcu *rx_handler_data; 1656 void __rcu *rx_handler_data;
1657 1657
1658#if CONFIG_NET_CLS_ACT
1659 struct tcf_proto __rcu *ingress_cl_list;
1660#endif
1658 struct netdev_queue __rcu *ingress_queue; 1661 struct netdev_queue __rcu *ingress_queue;
1662
1659 unsigned char broadcast[MAX_ADDR_LEN]; 1663 unsigned char broadcast[MAX_ADDR_LEN];
1660#ifdef CONFIG_RFS_ACCEL 1664#ifdef CONFIG_RFS_ACCEL
1661 struct cpu_rmap *rx_cpu_rmap; 1665 struct cpu_rmap *rx_cpu_rmap;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8a757464bfa2..e5f77c40bbd1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3525,31 +3525,37 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3525 struct packet_type **pt_prev, 3525 struct packet_type **pt_prev,
3526 int *ret, struct net_device *orig_dev) 3526 int *ret, struct net_device *orig_dev)
3527{ 3527{
3528 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); 3528 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
3529 struct Qdisc *q; 3529 struct tcf_result cl_res;
3530 3530
3531 /* If there's at least one ingress present somewhere (so 3531 /* If there's at least one ingress present somewhere (so
3532 * we get here via enabled static key), remaining devices 3532 * we get here via enabled static key), remaining devices
3533 * that are not configured with an ingress qdisc will bail 3533 * that are not configured with an ingress qdisc will bail
3534 * out w/o the rcu_dereference(). 3534 * out here.
3535 */ 3535 */
3536 if (!rxq || (q = rcu_dereference(rxq->qdisc)) == &noop_qdisc) 3536 if (!cl)
3537 return skb; 3537 return skb;
3538
3539 if (*pt_prev) { 3538 if (*pt_prev) {
3540 *ret = deliver_skb(skb, *pt_prev, orig_dev); 3539 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3541 *pt_prev = NULL; 3540 *pt_prev = NULL;
3542 } 3541 }
3543 3542
3543 qdisc_bstats_update_cpu(cl->q, skb);
3544 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 3544 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3545 3545
3546 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 3546 switch (tc_classify(skb, cl, &cl_res)) {
3547 switch (qdisc_enqueue_root(skb, q)) { 3547 case TC_ACT_OK:
3548 case TC_ACT_SHOT: 3548 case TC_ACT_RECLASSIFY:
3549 case TC_ACT_STOLEN: 3549 skb->tc_index = TC_H_MIN(cl_res.classid);
3550 kfree_skb(skb); 3550 break;
3551 return NULL; 3551 case TC_ACT_SHOT:
3552 } 3552 qdisc_qstats_drop_cpu(cl->q);
3553 case TC_ACT_STOLEN:
3554 case TC_ACT_QUEUED:
3555 kfree_skb(skb);
3556 return NULL;
3557 default:
3558 break;
3553 } 3559 }
3554 3560
3555 return skb; 3561 return skb;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a89cc3278bfb..e7c648fa9dc3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
15
15#include <net/netlink.h> 16#include <net/netlink.h>
16#include <net/pkt_sched.h> 17#include <net/pkt_sched.h>
17 18
18
19struct ingress_qdisc_data {
20 struct tcf_proto __rcu *filter_list;
21};
22
23/* ------------------------- Class/flow operations ------------------------- */
24
25static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 19static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
26{ 20{
27 return NULL; 21 return NULL;
@@ -49,45 +43,11 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
49static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, 43static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
50 unsigned long cl) 44 unsigned long cl)
51{ 45{
52 struct ingress_qdisc_data *p = qdisc_priv(sch); 46 struct net_device *dev = qdisc_dev(sch);
53
54 return &p->filter_list;
55}
56
57/* --------------------------- Qdisc operations ---------------------------- */
58 47
59static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) 48 return &dev->ingress_cl_list;
60{
61 struct ingress_qdisc_data *p = qdisc_priv(sch);
62 struct tcf_result res;
63 struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
64 int result;
65
66 result = tc_classify(skb, fl, &res);
67
68 qdisc_bstats_update_cpu(sch, skb);
69 switch (result) {
70 case TC_ACT_SHOT:
71 result = TC_ACT_SHOT;
72 qdisc_qstats_drop_cpu(sch);
73 break;
74 case TC_ACT_STOLEN:
75 case TC_ACT_QUEUED:
76 result = TC_ACT_STOLEN;
77 break;
78 case TC_ACT_RECLASSIFY:
79 case TC_ACT_OK:
80 skb->tc_index = TC_H_MIN(res.classid);
81 default:
82 result = TC_ACT_OK;
83 break;
84 }
85
86 return result;
87} 49}
88 50
89/* ------------------------------------------------------------- */
90
91static int ingress_init(struct Qdisc *sch, struct nlattr *opt) 51static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
92{ 52{
93 net_inc_ingress_queue(); 53 net_inc_ingress_queue();
@@ -98,9 +58,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
98 58
99static void ingress_destroy(struct Qdisc *sch) 59static void ingress_destroy(struct Qdisc *sch)
100{ 60{
101 struct ingress_qdisc_data *p = qdisc_priv(sch); 61 struct net_device *dev = qdisc_dev(sch);
102 62
103 tcf_destroy_chain(&p->filter_list); 63 tcf_destroy_chain(&dev->ingress_cl_list);
104 net_dec_ingress_queue(); 64 net_dec_ingress_queue();
105} 65}
106 66
@@ -111,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
111 nest = nla_nest_start(skb, TCA_OPTIONS); 71 nest = nla_nest_start(skb, TCA_OPTIONS);
112 if (nest == NULL) 72 if (nest == NULL)
113 goto nla_put_failure; 73 goto nla_put_failure;
74
114 return nla_nest_end(skb, nest); 75 return nla_nest_end(skb, nest);
115 76
116nla_put_failure: 77nla_put_failure:
@@ -131,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
131static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { 92static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
132 .cl_ops = &ingress_class_ops, 93 .cl_ops = &ingress_class_ops,
133 .id = "ingress", 94 .id = "ingress",
134 .priv_size = sizeof(struct ingress_qdisc_data),
135 .enqueue = ingress_enqueue,
136 .init = ingress_init, 95 .init = ingress_init,
137 .destroy = ingress_destroy, 96 .destroy = ingress_destroy,
138 .dump = ingress_dump, 97 .dump = ingress_dump,
@@ -149,6 +108,7 @@ static void __exit ingress_module_exit(void)
149 unregister_qdisc(&ingress_qdisc_ops); 108 unregister_qdisc(&ingress_qdisc_ops);
150} 109}
151 110
152module_init(ingress_module_init) 111module_init(ingress_module_init);
153module_exit(ingress_module_exit) 112module_exit(ingress_module_exit);
113
154MODULE_LICENSE("GPL"); 114MODULE_LICENSE("GPL");