aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2010-05-24 03:12:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-24 03:12:34 -0400
commitf845172531fb7410c7fb7780b1a6e51ee6df7d52 (patch)
treeef1030d0ad9d9dbc8fe800a145c587f04be50ade /net/sched
parenteda6e6f86b5f95b982ac7ebf7cf5be2a29a291e9 (diff)
cls_cgroup: Store classid in struct sock
Up until now cls_cgroup has relied on fetching the classid out of the current executing thread. This runs into trouble when a packet processing is delayed in which case it may execute out of another thread's context. Furthermore, even when a packet is not delayed we may fail to classify it if soft IRQs have been disabled, because this scenario is indistinguishable from one where a packet unrelated to the current thread is processed by a real soft IRQ. In fact, the current semantics is inherently broken, as a single skb may be constructed out of the writes of two different tasks. A different manifestation of this problem is when the TCP stack transmits in response of an incoming ACK. This is currently unclassified. As we already have a concept of packet ownership for accounting purposes in the skb->sk pointer, this is a natural place to store the classid in a persistent manner. This patch adds the cls_cgroup classid in struct sock, filling up an existing hole on 64-bit :) The value is set at socket creation time. So all sockets created via socket(2) automatically gains the ID of the thread creating it. Whenever another process touches the socket by either reading or writing to it, we will change the socket classid to that of the process if it has a valid (non-zero) classid. For sockets created on inbound connections through accept(2), we inherit the classid of the original listening socket through sk_clone, possibly preceding the actual accept(2) call. In order to minimise risks, I have not made this the authoritative classid. For now it is only used as a backup when we execute with soft IRQs disabled. Once we're completely happy with its semantics we can use it as the sole classid. Footnote: I have rearranged the error path on cls_group module creation. If we didn't do this, then there is a window where someone could create a tc rule using cls_group before the cgroup subsystem has been registered. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/cls_cgroup.c50
1 files changed, 34 insertions, 16 deletions
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/cgroup.h> 18#include <linux/cgroup.h>
19#include <linux/rcupdate.h>
19#include <net/rtnetlink.h> 20#include <net/rtnetlink.h>
20#include <net/pkt_cls.h> 21#include <net/pkt_cls.h>
21 22#include <net/sock.h>
22struct cgroup_cls_state 23#include <net/cls_cgroup.h>
23{
24 struct cgroup_subsys_state css;
25 u32 classid;
26};
27 24
28static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, 25static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
29 struct cgroup *cgrp); 26 struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
112 struct cls_cgroup_head *head = tp->root; 109 struct cls_cgroup_head *head = tp->root;
113 u32 classid; 110 u32 classid;
114 111
112 rcu_read_lock();
113 classid = task_cls_state(current)->classid;
114 rcu_read_unlock();
115
115 /* 116 /*
116 * Due to the nature of the classifier it is required to ignore all 117 * Due to the nature of the classifier it is required to ignore all
117 * packets originating from softirq context as accessing `current' 118 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
122 * calls by looking at the number of nested bh disable calls because 123 * calls by looking at the number of nested bh disable calls because
123 * softirqs always disables bh. 124 * softirqs always disables bh.
124 */ 125 */
125 if (softirq_count() != SOFTIRQ_OFFSET) 126 if (softirq_count() != SOFTIRQ_OFFSET) {
126 return -1; 127 /* If there is an sk_classid we'll use that. */
127 128 if (!skb->sk)
128 rcu_read_lock(); 129 return -1;
129 classid = task_cls_state(current)->classid; 130 classid = skb->sk->sk_classid;
130 rcu_read_unlock(); 131 }
131 132
132 if (!classid) 133 if (!classid)
133 return -1; 134 return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
289 290
290static int __init init_cgroup_cls(void) 291static int __init init_cgroup_cls(void)
291{ 292{
292 int ret = register_tcf_proto_ops(&cls_cgroup_ops); 293 int ret;
293 if (ret) 294
294 return ret;
295 ret = cgroup_load_subsys(&net_cls_subsys); 295 ret = cgroup_load_subsys(&net_cls_subsys);
296 if (ret) 296 if (ret)
297 unregister_tcf_proto_ops(&cls_cgroup_ops); 297 goto out;
298
299#ifndef CONFIG_NET_CLS_CGROUP
300 /* We can't use rcu_assign_pointer because this is an int. */
301 smp_wmb();
302 net_cls_subsys_id = net_cls_subsys.subsys_id;
303#endif
304
305 ret = register_tcf_proto_ops(&cls_cgroup_ops);
306 if (ret)
307 cgroup_unload_subsys(&net_cls_subsys);
308
309out:
298 return ret; 310 return ret;
299} 311}
300 312
301static void __exit exit_cgroup_cls(void) 313static void __exit exit_cgroup_cls(void)
302{ 314{
303 unregister_tcf_proto_ops(&cls_cgroup_ops); 315 unregister_tcf_proto_ops(&cls_cgroup_ops);
316
317#ifndef CONFIG_NET_CLS_CGROUP
318 net_cls_subsys_id = -1;
319 synchronize_rcu();
320#endif
321
304 cgroup_unload_subsys(&net_cls_subsys); 322 cgroup_unload_subsys(&net_cls_subsys);
305} 323}
306 324