diff options
author | Neil Horman <nhorman@tuxdriver.com> | 2009-05-21 03:36:08 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-05-21 19:50:21 -0400 |
commit | 4ea7e38696c7e798c47ebbecadfd392f23f814f9 (patch) | |
tree | 1e144d0c79fb667860ff7fa0ed203984e888ddab | |
parent | d95ed9275edcb8995bda31005bb3f55e087626d7 (diff) |
dropmon: add ability to detect when hardware dropsrxpackets
Patch to add the ability to detect drops in hardware interfaces via dropwatch.
Adds a tracepoint to net_rx_action to signal everytime a napi instance is
polled. The dropmon code then periodically checks to see if the rx_frames
counter has changed, and if so, adds a drop notification to the netlink
protocol, using the reserved all-0's vector to indicate the drop location was in
hardware, rather than somewhere in the code.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
include/linux/net_dropmon.h | 8 ++
include/trace/napi.h | 11 +++
net/core/dev.c | 5 +
net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++--
net/core/net-traces.c | 4 +
net/core/netpoll.c | 2
6 files changed, 149 insertions(+), 5 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/net_dropmon.h | 8 | ||||
-rw-r--r-- | include/trace/napi.h | 11 | ||||
-rw-r--r-- | net/core/dev.c | 5 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 124 | ||||
-rw-r--r-- | net/core/net-traces.c | 4 | ||||
-rw-r--r-- | net/core/netpoll.c | 2 |
6 files changed, 149 insertions, 5 deletions
diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a630..e8a8b5c50ed0 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h | |||
@@ -2,12 +2,20 @@ | |||
2 | #define __NET_DROPMON_H | 2 | #define __NET_DROPMON_H |
3 | 3 | ||
4 | #include <linux/netlink.h> | 4 | #include <linux/netlink.h> |
5 | #include <linux/types.h> | ||
5 | 6 | ||
6 | struct net_dm_drop_point { | 7 | struct net_dm_drop_point { |
7 | __u8 pc[8]; | 8 | __u8 pc[8]; |
8 | __u32 count; | 9 | __u32 count; |
9 | }; | 10 | }; |
10 | 11 | ||
12 | #define is_drop_point_hw(x) do {\ | ||
13 | int ____i, ____j;\ | ||
14 | for (____i = 0; ____i < 8; i ____i++)\ | ||
15 | ____j |= x[____i];\ | ||
16 | ____j;\ | ||
17 | } while (0) | ||
18 | |||
11 | #define NET_DM_CFG_VERSION 0 | 19 | #define NET_DM_CFG_VERSION 0 |
12 | #define NET_DM_CFG_ALERT_COUNT 1 | 20 | #define NET_DM_CFG_ALERT_COUNT 1 |
13 | #define NET_DM_CFG_ALERT_DELAY 2 | 21 | #define NET_DM_CFG_ALERT_DELAY 2 |
diff --git a/include/trace/napi.h b/include/trace/napi.h new file mode 100644 index 000000000000..a8989c4547e7 --- /dev/null +++ b/include/trace/napi.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef _TRACE_NAPI_H_ | ||
2 | #define _TRACE_NAPI_H_ | ||
3 | |||
4 | #include <linux/netdevice.h> | ||
5 | #include <linux/tracepoint.h> | ||
6 | |||
7 | DECLARE_TRACE(napi_poll, | ||
8 | TP_PROTO(struct napi_struct *napi), | ||
9 | TP_ARGS(napi)); | ||
10 | |||
11 | #endif | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 92ebeca29901..3942266d1f6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -126,6 +126,7 @@ | |||
126 | #include <linux/in.h> | 126 | #include <linux/in.h> |
127 | #include <linux/jhash.h> | 127 | #include <linux/jhash.h> |
128 | #include <linux/random.h> | 128 | #include <linux/random.h> |
129 | #include <trace/napi.h> | ||
129 | 130 | ||
130 | #include "net-sysfs.h" | 131 | #include "net-sysfs.h" |
131 | 132 | ||
@@ -2771,8 +2772,10 @@ static void net_rx_action(struct softirq_action *h) | |||
2771 | * accidently calling ->poll() when NAPI is not scheduled. | 2772 | * accidently calling ->poll() when NAPI is not scheduled. |
2772 | */ | 2773 | */ |
2773 | work = 0; | 2774 | work = 0; |
2774 | if (test_bit(NAPI_STATE_SCHED, &n->state)) | 2775 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
2775 | work = n->poll(n, weight); | 2776 | work = n->poll(n, weight); |
2777 | trace_napi_poll(n); | ||
2778 | } | ||
2776 | 2779 | ||
2777 | WARN_ON_ONCE(work > weight); | 2780 | WARN_ON_ONCE(work > weight); |
2778 | 2781 | ||
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2797b711a978..a6c2ac2828fb 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c | |||
@@ -22,8 +22,10 @@ | |||
22 | #include <linux/timer.h> | 22 | #include <linux/timer.h> |
23 | #include <linux/bitops.h> | 23 | #include <linux/bitops.h> |
24 | #include <net/genetlink.h> | 24 | #include <net/genetlink.h> |
25 | #include <net/netevent.h> | ||
25 | 26 | ||
26 | #include <trace/skb.h> | 27 | #include <trace/skb.h> |
28 | #include <trace/napi.h> | ||
27 | 29 | ||
28 | #include <asm/unaligned.h> | 30 | #include <asm/unaligned.h> |
29 | 31 | ||
@@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused); | |||
38 | * and the work handle that will send up | 40 | * and the work handle that will send up |
39 | * netlink alerts | 41 | * netlink alerts |
40 | */ | 42 | */ |
41 | struct sock *dm_sock; | 43 | static int trace_state = TRACE_OFF; |
44 | static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; | ||
42 | 45 | ||
43 | struct per_cpu_dm_data { | 46 | struct per_cpu_dm_data { |
44 | struct work_struct dm_alert_work; | 47 | struct work_struct dm_alert_work; |
@@ -47,6 +50,13 @@ struct per_cpu_dm_data { | |||
47 | struct timer_list send_timer; | 50 | struct timer_list send_timer; |
48 | }; | 51 | }; |
49 | 52 | ||
53 | struct dm_hw_stat_delta { | ||
54 | struct net_device *dev; | ||
55 | struct list_head list; | ||
56 | struct rcu_head rcu; | ||
57 | unsigned long last_drop_val; | ||
58 | }; | ||
59 | |||
50 | static struct genl_family net_drop_monitor_family = { | 60 | static struct genl_family net_drop_monitor_family = { |
51 | .id = GENL_ID_GENERATE, | 61 | .id = GENL_ID_GENERATE, |
52 | .hdrsize = 0, | 62 | .hdrsize = 0, |
@@ -59,7 +69,8 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); | |||
59 | 69 | ||
60 | static int dm_hit_limit = 64; | 70 | static int dm_hit_limit = 64; |
61 | static int dm_delay = 1; | 71 | static int dm_delay = 1; |
62 | 72 | static unsigned long dm_hw_check_delta = 2*HZ; | |
73 | static LIST_HEAD(hw_stats_list); | ||
63 | 74 | ||
64 | static void reset_per_cpu_data(struct per_cpu_dm_data *data) | 75 | static void reset_per_cpu_data(struct per_cpu_dm_data *data) |
65 | { | 76 | { |
@@ -115,7 +126,7 @@ static void sched_send_work(unsigned long unused) | |||
115 | schedule_work(&data->dm_alert_work); | 126 | schedule_work(&data->dm_alert_work); |
116 | } | 127 | } |
117 | 128 | ||
118 | static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | 129 | static void trace_drop_common(struct sk_buff *skb, void *location) |
119 | { | 130 | { |
120 | struct net_dm_alert_msg *msg; | 131 | struct net_dm_alert_msg *msg; |
121 | struct nlmsghdr *nlh; | 132 | struct nlmsghdr *nlh; |
@@ -159,24 +170,80 @@ out: | |||
159 | return; | 170 | return; |
160 | } | 171 | } |
161 | 172 | ||
173 | static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | ||
174 | { | ||
175 | trace_drop_common(skb, location); | ||
176 | } | ||
177 | |||
178 | static void trace_napi_poll_hit(struct napi_struct *napi) | ||
179 | { | ||
180 | struct dm_hw_stat_delta *new_stat; | ||
181 | |||
182 | /* | ||
183 | * Ratelimit our check time to dm_hw_check_delta jiffies | ||
184 | */ | ||
185 | if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) | ||
186 | return; | ||
187 | |||
188 | rcu_read_lock(); | ||
189 | list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { | ||
190 | if ((new_stat->dev == napi->dev) && | ||
191 | (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { | ||
192 | trace_drop_common(NULL, NULL); | ||
193 | new_stat->last_drop_val = napi->dev->stats.rx_dropped; | ||
194 | break; | ||
195 | } | ||
196 | } | ||
197 | rcu_read_unlock(); | ||
198 | } | ||
199 | |||
200 | |||
201 | static void free_dm_hw_stat(struct rcu_head *head) | ||
202 | { | ||
203 | struct dm_hw_stat_delta *n; | ||
204 | n = container_of(head, struct dm_hw_stat_delta, rcu); | ||
205 | kfree(n); | ||
206 | } | ||
207 | |||
162 | static int set_all_monitor_traces(int state) | 208 | static int set_all_monitor_traces(int state) |
163 | { | 209 | { |
164 | int rc = 0; | 210 | int rc = 0; |
211 | struct dm_hw_stat_delta *new_stat = NULL; | ||
212 | struct dm_hw_stat_delta *temp; | ||
213 | |||
214 | spin_lock(&trace_state_lock); | ||
165 | 215 | ||
166 | switch (state) { | 216 | switch (state) { |
167 | case TRACE_ON: | 217 | case TRACE_ON: |
168 | rc |= register_trace_kfree_skb(trace_kfree_skb_hit); | 218 | rc |= register_trace_kfree_skb(trace_kfree_skb_hit); |
219 | rc |= register_trace_napi_poll(trace_napi_poll_hit); | ||
169 | break; | 220 | break; |
170 | case TRACE_OFF: | 221 | case TRACE_OFF: |
171 | rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); | 222 | rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); |
223 | rc |= unregister_trace_napi_poll(trace_napi_poll_hit); | ||
172 | 224 | ||
173 | tracepoint_synchronize_unregister(); | 225 | tracepoint_synchronize_unregister(); |
226 | |||
227 | /* | ||
228 | * Clean the device list | ||
229 | */ | ||
230 | list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { | ||
231 | if (new_stat->dev == NULL) { | ||
232 | list_del_rcu(&new_stat->list); | ||
233 | call_rcu(&new_stat->rcu, free_dm_hw_stat); | ||
234 | } | ||
235 | } | ||
174 | break; | 236 | break; |
175 | default: | 237 | default: |
176 | rc = 1; | 238 | rc = 1; |
177 | break; | 239 | break; |
178 | } | 240 | } |
179 | 241 | ||
242 | if (!rc) | ||
243 | trace_state = state; | ||
244 | |||
245 | spin_unlock(&trace_state_lock); | ||
246 | |||
180 | if (rc) | 247 | if (rc) |
181 | return -EINPROGRESS; | 248 | return -EINPROGRESS; |
182 | return rc; | 249 | return rc; |
@@ -204,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb, | |||
204 | return -ENOTSUPP; | 271 | return -ENOTSUPP; |
205 | } | 272 | } |
206 | 273 | ||
274 | static int dropmon_net_event(struct notifier_block *ev_block, | ||
275 | unsigned long event, void *ptr) | ||
276 | { | ||
277 | struct net_device *dev = ptr; | ||
278 | struct dm_hw_stat_delta *new_stat = NULL; | ||
279 | struct dm_hw_stat_delta *tmp; | ||
280 | |||
281 | switch (event) { | ||
282 | case NETDEV_REGISTER: | ||
283 | new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); | ||
284 | |||
285 | if (!new_stat) | ||
286 | goto out; | ||
287 | |||
288 | new_stat->dev = dev; | ||
289 | INIT_RCU_HEAD(&new_stat->rcu); | ||
290 | spin_lock(&trace_state_lock); | ||
291 | list_add_rcu(&new_stat->list, &hw_stats_list); | ||
292 | spin_unlock(&trace_state_lock); | ||
293 | break; | ||
294 | case NETDEV_UNREGISTER: | ||
295 | spin_lock(&trace_state_lock); | ||
296 | list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { | ||
297 | if (new_stat->dev == dev) { | ||
298 | new_stat->dev = NULL; | ||
299 | if (trace_state == TRACE_OFF) { | ||
300 | list_del_rcu(&new_stat->list); | ||
301 | call_rcu(&new_stat->rcu, free_dm_hw_stat); | ||
302 | break; | ||
303 | } | ||
304 | } | ||
305 | } | ||
306 | spin_unlock(&trace_state_lock); | ||
307 | break; | ||
308 | } | ||
309 | out: | ||
310 | return NOTIFY_DONE; | ||
311 | } | ||
207 | 312 | ||
208 | static struct genl_ops dropmon_ops[] = { | 313 | static struct genl_ops dropmon_ops[] = { |
209 | { | 314 | { |
@@ -220,6 +325,10 @@ static struct genl_ops dropmon_ops[] = { | |||
220 | }, | 325 | }, |
221 | }; | 326 | }; |
222 | 327 | ||
328 | static struct notifier_block dropmon_net_notifier = { | ||
329 | .notifier_call = dropmon_net_event | ||
330 | }; | ||
331 | |||
223 | static int __init init_net_drop_monitor(void) | 332 | static int __init init_net_drop_monitor(void) |
224 | { | 333 | { |
225 | int cpu; | 334 | int cpu; |
@@ -243,12 +352,18 @@ static int __init init_net_drop_monitor(void) | |||
243 | ret = genl_register_ops(&net_drop_monitor_family, | 352 | ret = genl_register_ops(&net_drop_monitor_family, |
244 | &dropmon_ops[i]); | 353 | &dropmon_ops[i]); |
245 | if (ret) { | 354 | if (ret) { |
246 | printk(KERN_CRIT "failed to register operation %d\n", | 355 | printk(KERN_CRIT "Failed to register operation %d\n", |
247 | dropmon_ops[i].cmd); | 356 | dropmon_ops[i].cmd); |
248 | goto out_unreg; | 357 | goto out_unreg; |
249 | } | 358 | } |
250 | } | 359 | } |
251 | 360 | ||
361 | rc = register_netdevice_notifier(&dropmon_net_notifier); | ||
362 | if (rc < 0) { | ||
363 | printk(KERN_CRIT "Failed to register netdevice notifier\n"); | ||
364 | goto out_unreg; | ||
365 | } | ||
366 | |||
252 | rc = 0; | 367 | rc = 0; |
253 | 368 | ||
254 | for_each_present_cpu(cpu) { | 369 | for_each_present_cpu(cpu) { |
@@ -259,6 +374,7 @@ static int __init init_net_drop_monitor(void) | |||
259 | data->send_timer.data = cpu; | 374 | data->send_timer.data = cpu; |
260 | data->send_timer.function = sched_send_work; | 375 | data->send_timer.function = sched_send_work; |
261 | } | 376 | } |
377 | |||
262 | goto out; | 378 | goto out; |
263 | 379 | ||
264 | out_unreg: | 380 | out_unreg: |
diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb45665e4f..b07b25bd2cde 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/netlink.h> | 20 | #include <linux/netlink.h> |
21 | #include <linux/net_dropmon.h> | 21 | #include <linux/net_dropmon.h> |
22 | #include <trace/skb.h> | 22 | #include <trace/skb.h> |
23 | #include <trace/napi.h> | ||
23 | 24 | ||
24 | #include <asm/unaligned.h> | 25 | #include <asm/unaligned.h> |
25 | #include <asm/bitops.h> | 26 | #include <asm/bitops.h> |
@@ -27,3 +28,6 @@ | |||
27 | 28 | ||
28 | DEFINE_TRACE(kfree_skb); | 29 | DEFINE_TRACE(kfree_skb); |
29 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | 30 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); |
31 | |||
32 | DEFINE_TRACE(napi_poll); | ||
33 | EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 64f51eec6576..00b14e2c50ed 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <net/tcp.h> | 24 | #include <net/tcp.h> |
25 | #include <net/udp.h> | 25 | #include <net/udp.h> |
26 | #include <asm/unaligned.h> | 26 | #include <asm/unaligned.h> |
27 | #include <trace/napi.h> | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * We maintain a small pool of fully-sized skbs, to make sure the | 30 | * We maintain a small pool of fully-sized skbs, to make sure the |
@@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo, | |||
137 | set_bit(NAPI_STATE_NPSVC, &napi->state); | 138 | set_bit(NAPI_STATE_NPSVC, &napi->state); |
138 | 139 | ||
139 | work = napi->poll(napi, budget); | 140 | work = napi->poll(napi, budget); |
141 | trace_napi_poll(napi->dev); | ||
140 | 142 | ||
141 | clear_bit(NAPI_STATE_NPSVC, &napi->state); | 143 | clear_bit(NAPI_STATE_NPSVC, &napi->state); |
142 | atomic_dec(&trapped); | 144 | atomic_dec(&trapped); |