aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2010-05-24 03:12:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-24 03:12:34 -0400
commitf845172531fb7410c7fb7780b1a6e51ee6df7d52 (patch)
treeef1030d0ad9d9dbc8fe800a145c587f04be50ade /include
parenteda6e6f86b5f95b982ac7ebf7cf5be2a29a291e9 (diff)
cls_cgroup: Store classid in struct sock
Up until now cls_cgroup has relied on fetching the classid out of the current executing thread. This runs into trouble when a packet processing is delayed in which case it may execute out of another thread's context. Furthermore, even when a packet is not delayed we may fail to classify it if soft IRQs have been disabled, because this scenario is indistinguishable from one where a packet unrelated to the current thread is processed by a real soft IRQ. In fact, the current semantics is inherently broken, as a single skb may be constructed out of the writes of two different tasks. A different manifestation of this problem is when the TCP stack transmits in response of an incoming ACK. This is currently unclassified. As we already have a concept of packet ownership for accounting purposes in the skb->sk pointer, this is a natural place to store the classid in a persistent manner. This patch adds the cls_cgroup classid in struct sock, filling up an existing hole on 64-bit :) The value is set at socket creation time. So all sockets created via socket(2) automatically gains the ID of the thread creating it. Whenever another process touches the socket by either reading or writing to it, we will change the socket classid to that of the process if it has a valid (non-zero) classid. For sockets created on inbound connections through accept(2), we inherit the classid of the original listening socket through sk_clone, possibly preceding the actual accept(2) call. In order to minimise risks, I have not made this the authoritative classid. For now it is only used as a backup when we execute with soft IRQs disabled. Once we're completely happy with its semantics we can use it as the sole classid. Footnote: I have rearranged the error path on cls_group module creation. If we didn't do this, then there is a window where someone could create a tc rule using cls_group before the cgroup subsystem has been registered. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/cls_cgroup.h63
-rw-r--r--include/net/sock.h10
2 files changed, 72 insertions, 1 deletions
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
new file mode 100644
index 000000000000..ef2df1475b51
--- /dev/null
+++ b/include/net/cls_cgroup.h
@@ -0,0 +1,63 @@
1/*
2 * cls_cgroup.h Control Group Classifier
3 *
4 * Authors: Thomas Graf <tgraf@suug.ch>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#ifndef _NET_CLS_CGROUP_H
14#define _NET_CLS_CGROUP_H
15
16#include <linux/cgroup.h>
17#include <linux/hardirq.h>
18#include <linux/rcupdate.h>
19
20#ifdef CONFIG_CGROUPS
21struct cgroup_cls_state
22{
23 struct cgroup_subsys_state css;
24 u32 classid;
25};
26
27#ifdef CONFIG_NET_CLS_CGROUP
28static inline u32 task_cls_classid(struct task_struct *p)
29{
30 if (in_interrupt())
31 return 0;
32
33 return container_of(task_subsys_state(p, net_cls_subsys_id),
34 struct cgroup_cls_state, css).classid;
35}
36#else
37extern int net_cls_subsys_id;
38
39static inline u32 task_cls_classid(struct task_struct *p)
40{
41 int id;
42 u32 classid;
43
44 if (in_interrupt())
45 return 0;
46
47 rcu_read_lock();
48 id = rcu_dereference(net_cls_subsys_id);
49 if (id >= 0)
50 classid = container_of(task_subsys_state(p, id),
51 struct cgroup_cls_state, css)->classid;
52 rcu_read_unlock();
53
54 return classid;
55}
56#endif
57#else
58static inline u32 task_cls_classid(struct task_struct *p)
59{
60 return 0;
61}
62#endif
63#endif /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5697caf8cc76..d24f382cb712 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -312,7 +312,7 @@ struct sock {
312 void *sk_security; 312 void *sk_security;
313#endif 313#endif
314 __u32 sk_mark; 314 __u32 sk_mark;
315 /* XXX 4 bytes hole on 64 bit */ 315 u32 sk_classid;
316 void (*sk_state_change)(struct sock *sk); 316 void (*sk_state_change)(struct sock *sk);
317 void (*sk_data_ready)(struct sock *sk, int bytes); 317 void (*sk_data_ready)(struct sock *sk, int bytes);
318 void (*sk_write_space)(struct sock *sk); 318 void (*sk_write_space)(struct sock *sk);
@@ -1074,6 +1074,14 @@ extern void *sock_kmalloc(struct sock *sk, int size,
1074extern void sock_kfree_s(struct sock *sk, void *mem, int size); 1074extern void sock_kfree_s(struct sock *sk, void *mem, int size);
1075extern void sk_send_sigurg(struct sock *sk); 1075extern void sk_send_sigurg(struct sock *sk);
1076 1076
1077#ifdef CONFIG_CGROUPS
1078extern void sock_update_classid(struct sock *sk);
1079#else
1080static inline void sock_update_classid(struct sock *sk)
1081{
1082}
1083#endif
1084
1077/* 1085/*
1078 * Functions to fill in entries in struct proto_ops when a protocol 1086 * Functions to fill in entries in struct proto_ops when a protocol
1079 * does not implement a particular function. 1087 * does not implement a particular function.