drop_monitor: Make updating data->skb smp safe

Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in its smp protections. Specifically, its possible to replace data->skb while its being written. This patch corrects that by making data->skb an rcu protected variable. That will prevent it from being overwritten while a tracepoint is modifying it. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> CC: David Miller <davem@davemloft.net> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Neil Horman <nhorman@tuxdriver.com> 2012-04-27 06:11:49 -0400
committer: David S. Miller <davem@davemloft.net> 2012-04-28 02:18:48 -0400
commit: 3885ca785a3618593226687ced84f3f336dc3860 (patch)
tree: a6b5af980c1295aaeee69749d4d79d47e072321a /net
parent: cde2e9a651b76d8db36ae94cd0febc82b637e5dd (diff)
1 files changed, 54 insertions, 16 deletions
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index a221a5bbecf7..7592943513e3 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex);
 struct per_cpu_dm_data {
        struct work_struct dm_alert_work;
-        struct sk_buff *skb;
+        struct sk_buff __rcu *skb;
        atomic_t dm_hit_count;
        struct timer_list send_timer;
 };
@@ -73,35 +73,58 @@ static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
+static int initialized = 0;
 static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
        size_t al;
        struct net_dm_alert_msg *msg;
        struct nlattr *nla;
+        struct sk_buff *skb;
+        struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
        al = sizeof(struct net_dm_alert_msg);
        al += dm_hit_limit * sizeof(struct net_dm_drop_point);
        al += sizeof(struct nlattr);
-        data->skb = genlmsg_new(al, GFP_KERNEL);
+        skb = genlmsg_new(al, GFP_KERNEL);
-        genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
-                        0, NET_DM_CMD_ALERT);
+        if (skb) {
-        nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
+                genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
-        msg = nla_data(nla);
+                                0, NET_DM_CMD_ALERT);
-        memset(msg, 0, al);
+                nla = nla_reserve(skb, NLA_UNSPEC,
-        atomic_set(&data->dm_hit_count, dm_hit_limit);
+                                  sizeof(struct net_dm_alert_msg));
+                msg = nla_data(nla);
+                memset(msg, 0, al);
+        } else if (initialized)
+                schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+        /*
+         * Don't need to lock this, since we are guaranteed to only
+         * run this on a single cpu at a time.
+         * Note also that we only update data->skb if the old and new skb
+         * pointers don't match.  This ensures that we don't continually call
+         * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
+         */
+        if (skb != oskb) {
+                rcu_assign_pointer(data->skb, skb);
+                synchronize_rcu();
+                atomic_set(&data->dm_hit_count, dm_hit_limit);
+        }
 }
 static void send_dm_alert(struct work_struct *unused)
 {
        struct sk_buff *skb;
-        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+        struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
        /*
         * Grab the skb we're about to send
         */
-        skb = data->skb;
+        skb = rcu_dereference_protected(data->skb, 1);
        /*
         * Replace it with a new one
@@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused)
        /*
         * Ship it!
         */
-        genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+        if (skb)
+                genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+        put_cpu_var(dm_cpu_data);
 }
 /*
@@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused)
 */
 static void sched_send_work(unsigned long unused)
 {
-        struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
+        struct per_cpu_dm_data *data =  &get_cpu_var(dm_cpu_data);
+        schedule_work_on(smp_processor_id(), &data->dm_alert_work);
-        schedule_work(&data->dm_alert_work);
+        put_cpu_var(dm_cpu_data);
 }
 static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
        struct nlmsghdr *nlh;
        struct nlattr *nla;
        int i;
-        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+        struct sk_buff *dskb;
+        struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+        rcu_read_lock();
+        dskb = rcu_dereference(data->skb);
+        if (!dskb)
+                goto out;
        if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
                /*
                 * we're already at zero, discard this hit
@@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
                goto out;
        }
-        nlh = (struct nlmsghdr *)data->skb->data;
+        nlh = (struct nlmsghdr *)dskb->data;
        nla = genlmsg_data(nlmsg_data(nlh));
        msg = nla_data(nla);
        for (i = 0; i < msg->entries; i++) {
@@ -158,7 +192,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
        /*
         * We need to create a new entry
         */
-        __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+        __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
        nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
        memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
        msg->points[msg->entries].count = 1;
@@ -170,6 +204,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
        }
 out:
+        rcu_read_unlock();
+        put_cpu_var(dm_cpu_data);
        return;
 }
@@ -375,6 +411,8 @@ static int __init init_net_drop_monitor(void)
                data->send_timer.function = sched_send_work;
        }
+        initialized = 1;
        goto out;
 out_unreg:
author	Neil Horman <nhorman@tuxdriver.com>	2012-04-27 06:11:49 -0400
committer	David S. Miller <davem@davemloft.net>	2012-04-28 02:18:48 -0400
commit	3885ca785a3618593226687ced84f3f336dc3860 (patch)
tree	a6b5af980c1295aaeee69749d4d79d47e072321a /net
parent	cde2e9a651b76d8db36ae94cd0febc82b637e5dd (diff)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a221a5bbecf7..7592943513e3 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c
@@ -46,7 +46,7 @@ static DEFINE_MUTEX(trace_state_mutex);
46		46
47	struct per_cpu_dm_data {	47	struct per_cpu_dm_data {
48	struct work_struct dm_alert_work;	48	struct work_struct dm_alert_work;
49	struct sk_buff *skb;	49	struct sk_buff __rcu *skb;
50	atomic_t dm_hit_count;	50	atomic_t dm_hit_count;
51	struct timer_list send_timer;	51	struct timer_list send_timer;
52	};	52	};
@@ -73,35 +73,58 @@ static int dm_hit_limit = 64;
73	static int dm_delay = 1;	73	static int dm_delay = 1;
74	static unsigned long dm_hw_check_delta = 2*HZ;	74	static unsigned long dm_hw_check_delta = 2*HZ;
75	static LIST_HEAD(hw_stats_list);	75	static LIST_HEAD(hw_stats_list);
		76	static int initialized = 0;
76		77
77	static void reset_per_cpu_data(struct per_cpu_dm_data *data)	78	static void reset_per_cpu_data(struct per_cpu_dm_data *data)
78	{	79	{
79	size_t al;	80	size_t al;
80	struct net_dm_alert_msg *msg;	81	struct net_dm_alert_msg *msg;
81	struct nlattr *nla;	82	struct nlattr *nla;
		83	struct sk_buff *skb;
		84	struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
82		85
83	al = sizeof(struct net_dm_alert_msg);	86	al = sizeof(struct net_dm_alert_msg);
84	al += dm_hit_limit * sizeof(struct net_dm_drop_point);	87	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
85	al += sizeof(struct nlattr);	88	al += sizeof(struct nlattr);
86		89
87	data->skb = genlmsg_new(al, GFP_KERNEL);	90	skb = genlmsg_new(al, GFP_KERNEL);
88	genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,	91
89	0, NET_DM_CMD_ALERT);	92	if (skb) {
90	nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));	93	genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
91	msg = nla_data(nla);	94	0, NET_DM_CMD_ALERT);
92	memset(msg, 0, al);	95	nla = nla_reserve(skb, NLA_UNSPEC,
93	atomic_set(&data->dm_hit_count, dm_hit_limit);	96	sizeof(struct net_dm_alert_msg));
		97	msg = nla_data(nla);
		98	memset(msg, 0, al);
		99	} else if (initialized)
		100	schedule_work_on(smp_processor_id(), &data->dm_alert_work);
		101
		102	/*
		103	* Don't need to lock this, since we are guaranteed to only
		104	* run this on a single cpu at a time.
		105	* Note also that we only update data->skb if the old and new skb
		106	* pointers don't match. This ensures that we don't continually call
		107	* synchornize_rcu if we repeatedly fail to alloc a new netlink message.
		108	*/
		109	if (skb != oskb) {
		110	rcu_assign_pointer(data->skb, skb);
		111
		112	synchronize_rcu();
		113
		114	atomic_set(&data->dm_hit_count, dm_hit_limit);
		115	}
		116
94	}	117	}
95		118
96	static void send_dm_alert(struct work_struct *unused)	119	static void send_dm_alert(struct work_struct *unused)
97	{	120	{
98	struct sk_buff *skb;	121	struct sk_buff *skb;
99	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);	122	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
100		123
101	/*	124	/*
102	* Grab the skb we're about to send	125	* Grab the skb we're about to send
103	*/	126	*/
104	skb = data->skb;	127	skb = rcu_dereference_protected(data->skb, 1);
105		128
106	/*	129	/*
107	* Replace it with a new one	130	* Replace it with a new one
@@ -111,8 +134,10 @@ static void send_dm_alert(struct work_struct *unused)
111	/*	134	/*
112	* Ship it!	135	* Ship it!
113	*/	136	*/
114	genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);	137	if (skb)
		138	genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
115		139
		140	put_cpu_var(dm_cpu_data);
116	}	141	}
117		142
118	/*	143	/*
@@ -123,9 +148,11 @@ static void send_dm_alert(struct work_struct *unused)
123	*/	148	*/
124	static void sched_send_work(unsigned long unused)	149	static void sched_send_work(unsigned long unused)
125	{	150	{
126	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);	151	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
		152
		153	schedule_work_on(smp_processor_id(), &data->dm_alert_work);
127		154
128	schedule_work(&data->dm_alert_work);	155	put_cpu_var(dm_cpu_data);
129	}	156	}
130		157
131	static void trace_drop_common(struct sk_buff skb, void location)	158	static void trace_drop_common(struct sk_buff skb, void location)
@@ -134,9 +161,16 @@ static void trace_drop_common(struct sk_buff skb, void location)
134	struct nlmsghdr *nlh;	161	struct nlmsghdr *nlh;
135	struct nlattr *nla;	162	struct nlattr *nla;
136	int i;	163	int i;
137	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);	164	struct sk_buff *dskb;
		165	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
138		166
139		167
		168	rcu_read_lock();
		169	dskb = rcu_dereference(data->skb);
		170
		171	if (!dskb)
		172	goto out;
		173
140	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {	174	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
141	/*	175	/*
142	* we're already at zero, discard this hit	176	* we're already at zero, discard this hit
@@ -144,7 +178,7 @@ static void trace_drop_common(struct sk_buff skb, void location)
144	goto out;	178	goto out;
145	}	179	}
146		180
147	nlh = (struct nlmsghdr *)data->skb->data;	181	nlh = (struct nlmsghdr *)dskb->data;
148	nla = genlmsg_data(nlmsg_data(nlh));	182	nla = genlmsg_data(nlmsg_data(nlh));
149	msg = nla_data(nla);	183	msg = nla_data(nla);
150	for (i = 0; i < msg->entries; i++) {	184	for (i = 0; i < msg->entries; i++) {
@@ -158,7 +192,7 @@ static void trace_drop_common(struct sk_buff skb, void location)
158	/*	192	/*
159	* We need to create a new entry	193	* We need to create a new entry
160	*/	194	*/
161	__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));	195	__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
162	nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));	196	nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
163	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));	197	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
164	msg->points[msg->entries].count = 1;	198	msg->points[msg->entries].count = 1;
@@ -170,6 +204,8 @@ static void trace_drop_common(struct sk_buff skb, void location)
170	}	204	}
171		205
172	out:	206	out:
		207	rcu_read_unlock();
		208	put_cpu_var(dm_cpu_data);
173	return;	209	return;
174	}	210	}
175		211
@@ -375,6 +411,8 @@ static int __init init_net_drop_monitor(void)
375	data->send_timer.function = sched_send_work;	411	data->send_timer.function = sched_send_work;
376	}	412	}
377		413
		414	initialized = 1;
		415
378	goto out;	416	goto out;
379		417
380	out_unreg:	418	out_unreg: