diff options
author | Eric Dumazet <edumazet@google.com> | 2012-06-03 20:18:19 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-04 11:42:01 -0400 |
commit | bec4596b4e6770c7037f21f6bd27567b152dc0d6 (patch) | |
tree | efd668e2e2868d0059e1e02de2c0849ba65b9f62 /net | |
parent | f8f5701bdaf9134b1f90e5044a82c66324d2073f (diff) |
drop_monitor: dont sleep in atomic context
drop_monitor calls several sleeping functions while in atomic context.
BUG: sleeping function called from invalid context at mm/slub.c:943
in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2
Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55
Call Trace:
[<ffffffff810697ca>] __might_sleep+0xca/0xf0
[<ffffffff811345a3>] kmem_cache_alloc_node+0x1b3/0x1c0
[<ffffffff8105578c>] ? queue_delayed_work_on+0x11c/0x130
[<ffffffff815343fb>] __alloc_skb+0x4b/0x230
[<ffffffffa00b0360>] ? reset_per_cpu_data+0x160/0x160 [drop_monitor]
[<ffffffffa00b022f>] reset_per_cpu_data+0x2f/0x160 [drop_monitor]
[<ffffffffa00b03ab>] send_dm_alert+0x4b/0xb0 [drop_monitor]
[<ffffffff810568e0>] process_one_work+0x130/0x4c0
[<ffffffff81058249>] worker_thread+0x159/0x360
[<ffffffff810580f0>] ? manage_workers.isra.27+0x240/0x240
[<ffffffff8105d403>] kthread+0x93/0xa0
[<ffffffff816be6d4>] kernel_thread_helper+0x4/0x10
[<ffffffff8105d370>] ? kthread_freezable_should_stop+0x80/0x80
[<ffffffff816be6d0>] ? gs_change+0xb/0xb
Rework the logic to call the sleeping functions in right context.
Use standard timer/workqueue api to let system chose any cpu to perform
the allocation and netlink send.
Also avoid a loop if reset_per_cpu_data() cannot allocate memory :
use mod_timer() to wait 1/10 second before next try.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/drop_monitor.c | 102 |
1 files changed, 33 insertions, 69 deletions
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ea5fb9fcc3f5..d23b6682f4e9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c | |||
@@ -36,9 +36,6 @@ | |||
36 | #define TRACE_ON 1 | 36 | #define TRACE_ON 1 |
37 | #define TRACE_OFF 0 | 37 | #define TRACE_OFF 0 |
38 | 38 | ||
39 | static void send_dm_alert(struct work_struct *unused); | ||
40 | |||
41 | |||
42 | /* | 39 | /* |
43 | * Globals, our netlink socket pointer | 40 | * Globals, our netlink socket pointer |
44 | * and the work handle that will send up | 41 | * and the work handle that will send up |
@@ -48,11 +45,10 @@ static int trace_state = TRACE_OFF; | |||
48 | static DEFINE_MUTEX(trace_state_mutex); | 45 | static DEFINE_MUTEX(trace_state_mutex); |
49 | 46 | ||
50 | struct per_cpu_dm_data { | 47 | struct per_cpu_dm_data { |
51 | struct work_struct dm_alert_work; | 48 | spinlock_t lock; |
52 | struct sk_buff __rcu *skb; | 49 | struct sk_buff *skb; |
53 | atomic_t dm_hit_count; | 50 | struct work_struct dm_alert_work; |
54 | struct timer_list send_timer; | 51 | struct timer_list send_timer; |
55 | int cpu; | ||
56 | }; | 52 | }; |
57 | 53 | ||
58 | struct dm_hw_stat_delta { | 54 | struct dm_hw_stat_delta { |
@@ -78,13 +74,13 @@ static int dm_delay = 1; | |||
78 | static unsigned long dm_hw_check_delta = 2*HZ; | 74 | static unsigned long dm_hw_check_delta = 2*HZ; |
79 | static LIST_HEAD(hw_stats_list); | 75 | static LIST_HEAD(hw_stats_list); |
80 | 76 | ||
81 | static void reset_per_cpu_data(struct per_cpu_dm_data *data) | 77 | static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) |
82 | { | 78 | { |
83 | size_t al; | 79 | size_t al; |
84 | struct net_dm_alert_msg *msg; | 80 | struct net_dm_alert_msg *msg; |
85 | struct nlattr *nla; | 81 | struct nlattr *nla; |
86 | struct sk_buff *skb; | 82 | struct sk_buff *skb; |
87 | struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); | 83 | unsigned long flags; |
88 | 84 | ||
89 | al = sizeof(struct net_dm_alert_msg); | 85 | al = sizeof(struct net_dm_alert_msg); |
90 | al += dm_hit_limit * sizeof(struct net_dm_drop_point); | 86 | al += dm_hit_limit * sizeof(struct net_dm_drop_point); |
@@ -99,65 +95,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) | |||
99 | sizeof(struct net_dm_alert_msg)); | 95 | sizeof(struct net_dm_alert_msg)); |
100 | msg = nla_data(nla); | 96 | msg = nla_data(nla); |
101 | memset(msg, 0, al); | 97 | memset(msg, 0, al); |
102 | } else | 98 | } else { |
103 | schedule_work_on(data->cpu, &data->dm_alert_work); | 99 | mod_timer(&data->send_timer, jiffies + HZ / 10); |
104 | |||
105 | /* | ||
106 | * Don't need to lock this, since we are guaranteed to only | ||
107 | * run this on a single cpu at a time. | ||
108 | * Note also that we only update data->skb if the old and new skb | ||
109 | * pointers don't match. This ensures that we don't continually call | ||
110 | * synchornize_rcu if we repeatedly fail to alloc a new netlink message. | ||
111 | */ | ||
112 | if (skb != oskb) { | ||
113 | rcu_assign_pointer(data->skb, skb); | ||
114 | |||
115 | synchronize_rcu(); | ||
116 | |||
117 | atomic_set(&data->dm_hit_count, dm_hit_limit); | ||
118 | } | 100 | } |
119 | 101 | ||
102 | spin_lock_irqsave(&data->lock, flags); | ||
103 | swap(data->skb, skb); | ||
104 | spin_unlock_irqrestore(&data->lock, flags); | ||
105 | |||
106 | return skb; | ||
120 | } | 107 | } |
121 | 108 | ||
122 | static void send_dm_alert(struct work_struct *unused) | 109 | static void send_dm_alert(struct work_struct *work) |
123 | { | 110 | { |
124 | struct sk_buff *skb; | 111 | struct sk_buff *skb; |
125 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 112 | struct per_cpu_dm_data *data; |
126 | 113 | ||
127 | WARN_ON_ONCE(data->cpu != smp_processor_id()); | 114 | data = container_of(work, struct per_cpu_dm_data, dm_alert_work); |
128 | 115 | ||
129 | /* | 116 | skb = reset_per_cpu_data(data); |
130 | * Grab the skb we're about to send | ||
131 | */ | ||
132 | skb = rcu_dereference_protected(data->skb, 1); | ||
133 | |||
134 | /* | ||
135 | * Replace it with a new one | ||
136 | */ | ||
137 | reset_per_cpu_data(data); | ||
138 | 117 | ||
139 | /* | ||
140 | * Ship it! | ||
141 | */ | ||
142 | if (skb) | 118 | if (skb) |
143 | genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); | 119 | genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); |
144 | |||
145 | put_cpu_var(dm_cpu_data); | ||
146 | } | 120 | } |
147 | 121 | ||
148 | /* | 122 | /* |
149 | * This is the timer function to delay the sending of an alert | 123 | * This is the timer function to delay the sending of an alert |
150 | * in the event that more drops will arrive during the | 124 | * in the event that more drops will arrive during the |
151 | * hysteresis period. Note that it operates under the timer interrupt | 125 | * hysteresis period. |
152 | * so we don't need to disable preemption here | ||
153 | */ | 126 | */ |
154 | static void sched_send_work(unsigned long unused) | 127 | static void sched_send_work(unsigned long _data) |
155 | { | 128 | { |
156 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 129 | struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; |
157 | |||
158 | schedule_work_on(smp_processor_id(), &data->dm_alert_work); | ||
159 | 130 | ||
160 | put_cpu_var(dm_cpu_data); | 131 | schedule_work(&data->dm_alert_work); |
161 | } | 132 | } |
162 | 133 | ||
163 | static void trace_drop_common(struct sk_buff *skb, void *location) | 134 | static void trace_drop_common(struct sk_buff *skb, void *location) |
@@ -167,33 +138,28 @@ static void trace_drop_common(struct sk_buff *skb, void *location) | |||
167 | struct nlattr *nla; | 138 | struct nlattr *nla; |
168 | int i; | 139 | int i; |
169 | struct sk_buff *dskb; | 140 | struct sk_buff *dskb; |
170 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 141 | struct per_cpu_dm_data *data; |
171 | 142 | unsigned long flags; | |
172 | 143 | ||
173 | rcu_read_lock(); | 144 | local_irq_save(flags); |
174 | dskb = rcu_dereference(data->skb); | 145 | data = &__get_cpu_var(dm_cpu_data); |
146 | spin_lock(&data->lock); | ||
147 | dskb = data->skb; | ||
175 | 148 | ||
176 | if (!dskb) | 149 | if (!dskb) |
177 | goto out; | 150 | goto out; |
178 | 151 | ||
179 | if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { | ||
180 | /* | ||
181 | * we're already at zero, discard this hit | ||
182 | */ | ||
183 | goto out; | ||
184 | } | ||
185 | |||
186 | nlh = (struct nlmsghdr *)dskb->data; | 152 | nlh = (struct nlmsghdr *)dskb->data; |
187 | nla = genlmsg_data(nlmsg_data(nlh)); | 153 | nla = genlmsg_data(nlmsg_data(nlh)); |
188 | msg = nla_data(nla); | 154 | msg = nla_data(nla); |
189 | for (i = 0; i < msg->entries; i++) { | 155 | for (i = 0; i < msg->entries; i++) { |
190 | if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { | 156 | if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { |
191 | msg->points[i].count++; | 157 | msg->points[i].count++; |
192 | atomic_inc(&data->dm_hit_count); | ||
193 | goto out; | 158 | goto out; |
194 | } | 159 | } |
195 | } | 160 | } |
196 | 161 | if (msg->entries == dm_hit_limit) | |
162 | goto out; | ||
197 | /* | 163 | /* |
198 | * We need to create a new entry | 164 | * We need to create a new entry |
199 | */ | 165 | */ |
@@ -205,13 +171,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location) | |||
205 | 171 | ||
206 | if (!timer_pending(&data->send_timer)) { | 172 | if (!timer_pending(&data->send_timer)) { |
207 | data->send_timer.expires = jiffies + dm_delay * HZ; | 173 | data->send_timer.expires = jiffies + dm_delay * HZ; |
208 | add_timer_on(&data->send_timer, smp_processor_id()); | 174 | add_timer(&data->send_timer); |
209 | } | 175 | } |
210 | 176 | ||
211 | out: | 177 | out: |
212 | rcu_read_unlock(); | 178 | spin_unlock_irqrestore(&data->lock, flags); |
213 | put_cpu_var(dm_cpu_data); | ||
214 | return; | ||
215 | } | 179 | } |
216 | 180 | ||
217 | static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) | 181 | static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) |
@@ -418,11 +382,11 @@ static int __init init_net_drop_monitor(void) | |||
418 | 382 | ||
419 | for_each_possible_cpu(cpu) { | 383 | for_each_possible_cpu(cpu) { |
420 | data = &per_cpu(dm_cpu_data, cpu); | 384 | data = &per_cpu(dm_cpu_data, cpu); |
421 | data->cpu = cpu; | ||
422 | INIT_WORK(&data->dm_alert_work, send_dm_alert); | 385 | INIT_WORK(&data->dm_alert_work, send_dm_alert); |
423 | init_timer(&data->send_timer); | 386 | init_timer(&data->send_timer); |
424 | data->send_timer.data = cpu; | 387 | data->send_timer.data = (unsigned long)data; |
425 | data->send_timer.function = sched_send_work; | 388 | data->send_timer.function = sched_send_work; |
389 | spin_lock_init(&data->lock); | ||
426 | reset_per_cpu_data(data); | 390 | reset_per_cpu_data(data); |
427 | } | 391 | } |
428 | 392 | ||