aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/drop_monitor.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/drop_monitor.c')
-rw-r--r--net/core/drop_monitor.c724
1 files changed, 701 insertions, 23 deletions
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 39e094907391..bfc024024aa3 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,6 +26,7 @@
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <net/drop_monitor.h>
29#include <net/genetlink.h> 30#include <net/genetlink.h>
30#include <net/netevent.h> 31#include <net/netevent.h>
31 32
@@ -43,6 +44,7 @@
43 * netlink alerts 44 * netlink alerts
44 */ 45 */
45static int trace_state = TRACE_OFF; 46static int trace_state = TRACE_OFF;
47static bool monitor_hw;
46 48
47/* net_dm_mutex 49/* net_dm_mutex
48 * 50 *
@@ -56,9 +58,26 @@ struct net_dm_stats {
56 struct u64_stats_sync syncp; 58 struct u64_stats_sync syncp;
57}; 59};
58 60
61#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
62
63struct net_dm_hw_entry {
64 char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
65 u32 count;
66};
67
68struct net_dm_hw_entries {
69 u32 num_entries;
70 struct net_dm_hw_entry entries[0];
71};
72
59struct per_cpu_dm_data { 73struct per_cpu_dm_data {
60 spinlock_t lock; /* Protects 'skb' and 'send_timer' */ 74 spinlock_t lock; /* Protects 'skb', 'hw_entries' and
61 struct sk_buff *skb; 75 * 'send_timer'
76 */
77 union {
78 struct sk_buff *skb;
79 struct net_dm_hw_entries *hw_entries;
80 };
62 struct sk_buff_head drop_queue; 81 struct sk_buff_head drop_queue;
63 struct work_struct dm_alert_work; 82 struct work_struct dm_alert_work;
64 struct timer_list send_timer; 83 struct timer_list send_timer;
@@ -76,6 +95,7 @@ struct dm_hw_stat_delta {
76static struct genl_family net_drop_monitor_family; 95static struct genl_family net_drop_monitor_family;
77 96
78static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 97static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
98static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
79 99
80static int dm_hit_limit = 64; 100static int dm_hit_limit = 64;
81static int dm_delay = 1; 101static int dm_delay = 1;
@@ -92,10 +112,16 @@ struct net_dm_alert_ops {
92 void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, 112 void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
93 int work, int budget); 113 int work, int budget);
94 void (*work_item_func)(struct work_struct *work); 114 void (*work_item_func)(struct work_struct *work);
115 void (*hw_work_item_func)(struct work_struct *work);
116 void (*hw_probe)(struct sk_buff *skb,
117 const struct net_dm_hw_metadata *hw_metadata);
95}; 118};
96 119
97struct net_dm_skb_cb { 120struct net_dm_skb_cb {
98 void *pc; 121 union {
122 struct net_dm_hw_metadata *hw_metadata;
123 void *pc;
124 };
99}; 125};
100 126
101#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) 127#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -266,10 +292,190 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
266 rcu_read_unlock(); 292 rcu_read_unlock();
267} 293}
268 294
295static struct net_dm_hw_entries *
296net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
297{
298 struct net_dm_hw_entries *hw_entries;
299 unsigned long flags;
300
301 hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
302 GFP_KERNEL);
303 if (!hw_entries) {
304 /* If the memory allocation failed, we try to perform another
305 * allocation in 1/10 second. Otherwise, the probe function
306 * will constantly bail out.
307 */
308 mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
309 }
310
311 spin_lock_irqsave(&hw_data->lock, flags);
312 swap(hw_data->hw_entries, hw_entries);
313 spin_unlock_irqrestore(&hw_data->lock, flags);
314
315 return hw_entries;
316}
317
318static int net_dm_hw_entry_put(struct sk_buff *msg,
319 const struct net_dm_hw_entry *hw_entry)
320{
321 struct nlattr *attr;
322
323 attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
324 if (!attr)
325 return -EMSGSIZE;
326
327 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
328 goto nla_put_failure;
329
330 if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
331 goto nla_put_failure;
332
333 nla_nest_end(msg, attr);
334
335 return 0;
336
337nla_put_failure:
338 nla_nest_cancel(msg, attr);
339 return -EMSGSIZE;
340}
341
342static int net_dm_hw_entries_put(struct sk_buff *msg,
343 const struct net_dm_hw_entries *hw_entries)
344{
345 struct nlattr *attr;
346 int i;
347
348 attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
349 if (!attr)
350 return -EMSGSIZE;
351
352 for (i = 0; i < hw_entries->num_entries; i++) {
353 int rc;
354
355 rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
356 if (rc)
357 goto nla_put_failure;
358 }
359
360 nla_nest_end(msg, attr);
361
362 return 0;
363
364nla_put_failure:
365 nla_nest_cancel(msg, attr);
366 return -EMSGSIZE;
367}
368
369static int
370net_dm_hw_summary_report_fill(struct sk_buff *msg,
371 const struct net_dm_hw_entries *hw_entries)
372{
373 struct net_dm_alert_msg anc_hdr = { 0 };
374 void *hdr;
375 int rc;
376
377 hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
378 NET_DM_CMD_ALERT);
379 if (!hdr)
380 return -EMSGSIZE;
381
382 /* We need to put the ancillary header in order not to break user
383 * space.
384 */
385 if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
386 goto nla_put_failure;
387
388 rc = net_dm_hw_entries_put(msg, hw_entries);
389 if (rc)
390 goto nla_put_failure;
391
392 genlmsg_end(msg, hdr);
393
394 return 0;
395
396nla_put_failure:
397 genlmsg_cancel(msg, hdr);
398 return -EMSGSIZE;
399}
400
401static void net_dm_hw_summary_work(struct work_struct *work)
402{
403 struct net_dm_hw_entries *hw_entries;
404 struct per_cpu_dm_data *hw_data;
405 struct sk_buff *msg;
406 int rc;
407
408 hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
409
410 hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
411 if (!hw_entries)
412 return;
413
414 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
415 if (!msg)
416 goto out;
417
418 rc = net_dm_hw_summary_report_fill(msg, hw_entries);
419 if (rc) {
420 nlmsg_free(msg);
421 goto out;
422 }
423
424 genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
425
426out:
427 kfree(hw_entries);
428}
429
430static void
431net_dm_hw_summary_probe(struct sk_buff *skb,
432 const struct net_dm_hw_metadata *hw_metadata)
433{
434 struct net_dm_hw_entries *hw_entries;
435 struct net_dm_hw_entry *hw_entry;
436 struct per_cpu_dm_data *hw_data;
437 unsigned long flags;
438 int i;
439
440 hw_data = this_cpu_ptr(&dm_hw_cpu_data);
441 spin_lock_irqsave(&hw_data->lock, flags);
442 hw_entries = hw_data->hw_entries;
443
444 if (!hw_entries)
445 goto out;
446
447 for (i = 0; i < hw_entries->num_entries; i++) {
448 hw_entry = &hw_entries->entries[i];
449 if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
450 NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
451 hw_entry->count++;
452 goto out;
453 }
454 }
455 if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
456 goto out;
457
458 hw_entry = &hw_entries->entries[hw_entries->num_entries];
459 strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
460 NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
461 hw_entry->count = 1;
462 hw_entries->num_entries++;
463
464 if (!timer_pending(&hw_data->send_timer)) {
465 hw_data->send_timer.expires = jiffies + dm_delay * HZ;
466 add_timer(&hw_data->send_timer);
467 }
468
469out:
470 spin_unlock_irqrestore(&hw_data->lock, flags);
471}
472
269static const struct net_dm_alert_ops net_dm_alert_summary_ops = { 473static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
270 .kfree_skb_probe = trace_kfree_skb_hit, 474 .kfree_skb_probe = trace_kfree_skb_hit,
271 .napi_poll_probe = trace_napi_poll_hit, 475 .napi_poll_probe = trace_napi_poll_hit,
272 .work_item_func = send_dm_alert, 476 .work_item_func = send_dm_alert,
477 .hw_work_item_func = net_dm_hw_summary_work,
478 .hw_probe = net_dm_hw_summary_probe,
273}; 479};
274 480
275static void net_dm_packet_trace_kfree_skb_hit(void *ignore, 481static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
@@ -323,7 +529,9 @@ static size_t net_dm_in_port_size(void)
323 /* NET_DM_ATTR_IN_PORT nest */ 529 /* NET_DM_ATTR_IN_PORT nest */
324 return nla_total_size(0) + 530 return nla_total_size(0) +
325 /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */ 531 /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
326 nla_total_size(sizeof(u32)); 532 nla_total_size(sizeof(u32)) +
533 /* NET_DM_ATTR_PORT_NETDEV_NAME */
534 nla_total_size(IFNAMSIZ + 1);
327} 535}
328 536
329#define NET_DM_MAX_SYMBOL_LEN 40 537#define NET_DM_MAX_SYMBOL_LEN 40
@@ -335,6 +543,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
335 size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); 543 size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
336 544
337 return NLMSG_ALIGN(size) + 545 return NLMSG_ALIGN(size) +
546 /* NET_DM_ATTR_ORIGIN */
547 nla_total_size(sizeof(u16)) +
338 /* NET_DM_ATTR_PC */ 548 /* NET_DM_ATTR_PC */
339 nla_total_size(sizeof(u64)) + 549 nla_total_size(sizeof(u64)) +
340 /* NET_DM_ATTR_SYMBOL */ 550 /* NET_DM_ATTR_SYMBOL */
@@ -351,7 +561,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
351 nla_total_size(payload_len); 561 nla_total_size(payload_len);
352} 562}
353 563
354static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex) 564static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
565 const char *name)
355{ 566{
356 struct nlattr *attr; 567 struct nlattr *attr;
357 568
@@ -363,6 +574,9 @@ static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
363 nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex)) 574 nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
364 goto nla_put_failure; 575 goto nla_put_failure;
365 576
577 if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
578 goto nla_put_failure;
579
366 nla_nest_end(msg, attr); 580 nla_nest_end(msg, attr);
367 581
368 return 0; 582 return 0;
@@ -387,6 +601,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
387 if (!hdr) 601 if (!hdr)
388 return -EMSGSIZE; 602 return -EMSGSIZE;
389 603
604 if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
605 goto nla_put_failure;
606
390 if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD)) 607 if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
391 goto nla_put_failure; 608 goto nla_put_failure;
392 609
@@ -394,7 +611,7 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
394 if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) 611 if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
395 goto nla_put_failure; 612 goto nla_put_failure;
396 613
397 rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif); 614 rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
398 if (rc) 615 if (rc)
399 goto nla_put_failure; 616 goto nla_put_failure;
400 617
@@ -481,10 +698,250 @@ static void net_dm_packet_work(struct work_struct *work)
481 net_dm_packet_report(skb); 698 net_dm_packet_report(skb);
482} 699}
483 700
701static size_t
702net_dm_hw_packet_report_size(size_t payload_len,
703 const struct net_dm_hw_metadata *hw_metadata)
704{
705 size_t size;
706
707 size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
708
709 return NLMSG_ALIGN(size) +
710 /* NET_DM_ATTR_ORIGIN */
711 nla_total_size(sizeof(u16)) +
712 /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
713 nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
714 /* NET_DM_ATTR_HW_TRAP_NAME */
715 nla_total_size(strlen(hw_metadata->trap_name) + 1) +
716 /* NET_DM_ATTR_IN_PORT */
717 net_dm_in_port_size() +
718 /* NET_DM_ATTR_TIMESTAMP */
719 nla_total_size(sizeof(struct timespec)) +
720 /* NET_DM_ATTR_ORIG_LEN */
721 nla_total_size(sizeof(u32)) +
722 /* NET_DM_ATTR_PROTO */
723 nla_total_size(sizeof(u16)) +
724 /* NET_DM_ATTR_PAYLOAD */
725 nla_total_size(payload_len);
726}
727
728static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
729 struct sk_buff *skb, size_t payload_len)
730{
731 struct net_dm_hw_metadata *hw_metadata;
732 struct nlattr *attr;
733 struct timespec ts;
734 void *hdr;
735
736 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
737
738 hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
739 NET_DM_CMD_PACKET_ALERT);
740 if (!hdr)
741 return -EMSGSIZE;
742
743 if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
744 goto nla_put_failure;
745
746 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
747 hw_metadata->trap_group_name))
748 goto nla_put_failure;
749
750 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
751 hw_metadata->trap_name))
752 goto nla_put_failure;
753
754 if (hw_metadata->input_dev) {
755 struct net_device *dev = hw_metadata->input_dev;
756 int rc;
757
758 rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
759 dev->name);
760 if (rc)
761 goto nla_put_failure;
762 }
763
764 if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
765 nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
766 goto nla_put_failure;
767
768 if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
769 goto nla_put_failure;
770
771 if (!payload_len)
772 goto out;
773
774 if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
775 goto nla_put_failure;
776
777 attr = skb_put(msg, nla_total_size(payload_len));
778 attr->nla_type = NET_DM_ATTR_PAYLOAD;
779 attr->nla_len = nla_attr_size(payload_len);
780 if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
781 goto nla_put_failure;
782
783out:
784 genlmsg_end(msg, hdr);
785
786 return 0;
787
788nla_put_failure:
789 genlmsg_cancel(msg, hdr);
790 return -EMSGSIZE;
791}
792
793static struct net_dm_hw_metadata *
794net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
795{
796 struct net_dm_hw_metadata *n_hw_metadata;
797 const char *trap_group_name;
798 const char *trap_name;
799
800 n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
801 if (!n_hw_metadata)
802 return NULL;
803
804 trap_group_name = kmemdup(hw_metadata->trap_group_name,
805 strlen(hw_metadata->trap_group_name) + 1,
806 GFP_ATOMIC | __GFP_ZERO);
807 if (!trap_group_name)
808 goto free_hw_metadata;
809 n_hw_metadata->trap_group_name = trap_group_name;
810
811 trap_name = kmemdup(hw_metadata->trap_name,
812 strlen(hw_metadata->trap_name) + 1,
813 GFP_ATOMIC | __GFP_ZERO);
814 if (!trap_name)
815 goto free_trap_group;
816 n_hw_metadata->trap_name = trap_name;
817
818 n_hw_metadata->input_dev = hw_metadata->input_dev;
819 if (n_hw_metadata->input_dev)
820 dev_hold(n_hw_metadata->input_dev);
821
822 return n_hw_metadata;
823
824free_trap_group:
825 kfree(trap_group_name);
826free_hw_metadata:
827 kfree(n_hw_metadata);
828 return NULL;
829}
830
831static void
832net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
833{
834 if (hw_metadata->input_dev)
835 dev_put(hw_metadata->input_dev);
836 kfree(hw_metadata->trap_name);
837 kfree(hw_metadata->trap_group_name);
838 kfree(hw_metadata);
839}
840
841static void net_dm_hw_packet_report(struct sk_buff *skb)
842{
843 struct net_dm_hw_metadata *hw_metadata;
844 struct sk_buff *msg;
845 size_t payload_len;
846 int rc;
847
848 if (skb->data > skb_mac_header(skb))
849 skb_push(skb, skb->data - skb_mac_header(skb));
850 else
851 skb_pull(skb, skb_mac_header(skb) - skb->data);
852
853 payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
854 if (net_dm_trunc_len)
855 payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
856
857 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
858 msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
859 GFP_KERNEL);
860 if (!msg)
861 goto out;
862
863 rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
864 if (rc) {
865 nlmsg_free(msg);
866 goto out;
867 }
868
869 genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
870
871out:
872 net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
873 consume_skb(skb);
874}
875
876static void net_dm_hw_packet_work(struct work_struct *work)
877{
878 struct per_cpu_dm_data *hw_data;
879 struct sk_buff_head list;
880 struct sk_buff *skb;
881 unsigned long flags;
882
883 hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
884
885 __skb_queue_head_init(&list);
886
887 spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
888 skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
889 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
890
891 while ((skb = __skb_dequeue(&list)))
892 net_dm_hw_packet_report(skb);
893}
894
895static void
896net_dm_hw_packet_probe(struct sk_buff *skb,
897 const struct net_dm_hw_metadata *hw_metadata)
898{
899 struct net_dm_hw_metadata *n_hw_metadata;
900 ktime_t tstamp = ktime_get_real();
901 struct per_cpu_dm_data *hw_data;
902 struct sk_buff *nskb;
903 unsigned long flags;
904
905 nskb = skb_clone(skb, GFP_ATOMIC);
906 if (!nskb)
907 return;
908
909 n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
910 if (!n_hw_metadata)
911 goto free;
912
913 NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
914 nskb->tstamp = tstamp;
915
916 hw_data = this_cpu_ptr(&dm_hw_cpu_data);
917
918 spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
919 if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
920 __skb_queue_tail(&hw_data->drop_queue, nskb);
921 else
922 goto unlock_free;
923 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
924
925 schedule_work(&hw_data->dm_alert_work);
926
927 return;
928
929unlock_free:
930 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
931 u64_stats_update_begin(&hw_data->stats.syncp);
932 hw_data->stats.dropped++;
933 u64_stats_update_end(&hw_data->stats.syncp);
934 net_dm_hw_metadata_free(n_hw_metadata);
935free:
936 consume_skb(nskb);
937}
938
484static const struct net_dm_alert_ops net_dm_alert_packet_ops = { 939static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
485 .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit, 940 .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
486 .napi_poll_probe = net_dm_packet_trace_napi_poll_hit, 941 .napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
487 .work_item_func = net_dm_packet_work, 942 .work_item_func = net_dm_packet_work,
943 .hw_work_item_func = net_dm_hw_packet_work,
944 .hw_probe = net_dm_hw_packet_probe,
488}; 945};
489 946
490static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = { 947static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
@@ -492,6 +949,85 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
492 [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops, 949 [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
493}; 950};
494 951
952void net_dm_hw_report(struct sk_buff *skb,
953 const struct net_dm_hw_metadata *hw_metadata)
954{
955 rcu_read_lock();
956
957 if (!monitor_hw)
958 goto out;
959
960 net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
961
962out:
963 rcu_read_unlock();
964}
965EXPORT_SYMBOL_GPL(net_dm_hw_report);
966
967static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
968{
969 const struct net_dm_alert_ops *ops;
970 int cpu;
971
972 if (monitor_hw) {
973 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
974 return -EAGAIN;
975 }
976
977 ops = net_dm_alert_ops_arr[net_dm_alert_mode];
978
979 if (!try_module_get(THIS_MODULE)) {
980 NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
981 return -ENODEV;
982 }
983
984 for_each_possible_cpu(cpu) {
985 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
986 struct net_dm_hw_entries *hw_entries;
987
988 INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
989 timer_setup(&hw_data->send_timer, sched_send_work, 0);
990 hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
991 kfree(hw_entries);
992 }
993
994 monitor_hw = true;
995
996 return 0;
997}
998
999static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
1000{
1001 int cpu;
1002
1003 if (!monitor_hw)
1004 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
1005
1006 monitor_hw = false;
1007
1008 /* After this call returns we are guaranteed that no CPU is processing
1009 * any hardware drops.
1010 */
1011 synchronize_rcu();
1012
1013 for_each_possible_cpu(cpu) {
1014 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1015 struct sk_buff *skb;
1016
1017 del_timer_sync(&hw_data->send_timer);
1018 cancel_work_sync(&hw_data->dm_alert_work);
1019 while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1020 struct net_dm_hw_metadata *hw_metadata;
1021
1022 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1023 net_dm_hw_metadata_free(hw_metadata);
1024 consume_skb(skb);
1025 }
1026 }
1027
1028 module_put(THIS_MODULE);
1029}
1030
495static int net_dm_trace_on_set(struct netlink_ext_ack *extack) 1031static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
496{ 1032{
497 const struct net_dm_alert_ops *ops; 1033 const struct net_dm_alert_ops *ops;
@@ -604,6 +1140,11 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
604 return rc; 1140 return rc;
605} 1141}
606 1142
1143static bool net_dm_is_monitoring(void)
1144{
1145 return trace_state == TRACE_ON || monitor_hw;
1146}
1147
607static int net_dm_alert_mode_get_from_info(struct genl_info *info, 1148static int net_dm_alert_mode_get_from_info(struct genl_info *info,
608 enum net_dm_alert_mode *p_alert_mode) 1149 enum net_dm_alert_mode *p_alert_mode)
609{ 1150{
@@ -665,8 +1206,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
665 struct netlink_ext_ack *extack = info->extack; 1206 struct netlink_ext_ack *extack = info->extack;
666 int rc; 1207 int rc;
667 1208
668 if (trace_state == TRACE_ON) { 1209 if (net_dm_is_monitoring()) {
669 NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on"); 1210 NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
670 return -EBUSY; 1211 return -EBUSY;
671 } 1212 }
672 1213
@@ -681,14 +1222,61 @@ static int net_dm_cmd_config(struct sk_buff *skb,
681 return 0; 1222 return 0;
682} 1223}
683 1224
1225static int net_dm_monitor_start(bool set_sw, bool set_hw,
1226 struct netlink_ext_ack *extack)
1227{
1228 bool sw_set = false;
1229 int rc;
1230
1231 if (set_sw) {
1232 rc = set_all_monitor_traces(TRACE_ON, extack);
1233 if (rc)
1234 return rc;
1235 sw_set = true;
1236 }
1237
1238 if (set_hw) {
1239 rc = net_dm_hw_monitor_start(extack);
1240 if (rc)
1241 goto err_monitor_hw;
1242 }
1243
1244 return 0;
1245
1246err_monitor_hw:
1247 if (sw_set)
1248 set_all_monitor_traces(TRACE_OFF, extack);
1249 return rc;
1250}
1251
1252static void net_dm_monitor_stop(bool set_sw, bool set_hw,
1253 struct netlink_ext_ack *extack)
1254{
1255 if (set_hw)
1256 net_dm_hw_monitor_stop(extack);
1257 if (set_sw)
1258 set_all_monitor_traces(TRACE_OFF, extack);
1259}
1260
684static int net_dm_cmd_trace(struct sk_buff *skb, 1261static int net_dm_cmd_trace(struct sk_buff *skb,
685 struct genl_info *info) 1262 struct genl_info *info)
686{ 1263{
1264 bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
1265 bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
1266 struct netlink_ext_ack *extack = info->extack;
1267
1268 /* To maintain backward compatibility, we start / stop monitoring of
1269 * software drops if no flag is specified.
1270 */
1271 if (!set_sw && !set_hw)
1272 set_sw = true;
1273
687 switch (info->genlhdr->cmd) { 1274 switch (info->genlhdr->cmd) {
688 case NET_DM_CMD_START: 1275 case NET_DM_CMD_START:
689 return set_all_monitor_traces(TRACE_ON, info->extack); 1276 return net_dm_monitor_start(set_sw, set_hw, extack);
690 case NET_DM_CMD_STOP: 1277 case NET_DM_CMD_STOP:
691 return set_all_monitor_traces(TRACE_OFF, info->extack); 1278 net_dm_monitor_stop(set_sw, set_hw, extack);
1279 return 0;
692 } 1280 }
693 1281
694 return -EOPNOTSUPP; 1282 return -EOPNOTSUPP;
@@ -785,6 +1373,50 @@ nla_put_failure:
785 return -EMSGSIZE; 1373 return -EMSGSIZE;
786} 1374}
787 1375
1376static void net_dm_hw_stats_read(struct net_dm_stats *stats)
1377{
1378 int cpu;
1379
1380 memset(stats, 0, sizeof(*stats));
1381 for_each_possible_cpu(cpu) {
1382 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1383 struct net_dm_stats *cpu_stats = &hw_data->stats;
1384 unsigned int start;
1385 u64 dropped;
1386
1387 do {
1388 start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1389 dropped = cpu_stats->dropped;
1390 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1391
1392 stats->dropped += dropped;
1393 }
1394}
1395
1396static int net_dm_hw_stats_put(struct sk_buff *msg)
1397{
1398 struct net_dm_stats stats;
1399 struct nlattr *attr;
1400
1401 net_dm_hw_stats_read(&stats);
1402
1403 attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
1404 if (!attr)
1405 return -EMSGSIZE;
1406
1407 if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1408 stats.dropped, NET_DM_ATTR_PAD))
1409 goto nla_put_failure;
1410
1411 nla_nest_end(msg, attr);
1412
1413 return 0;
1414
1415nla_put_failure:
1416 nla_nest_cancel(msg, attr);
1417 return -EMSGSIZE;
1418}
1419
788static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info) 1420static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
789{ 1421{
790 void *hdr; 1422 void *hdr;
@@ -799,6 +1431,10 @@ static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
799 if (rc) 1431 if (rc)
800 goto nla_put_failure; 1432 goto nla_put_failure;
801 1433
1434 rc = net_dm_hw_stats_put(msg);
1435 if (rc)
1436 goto nla_put_failure;
1437
802 genlmsg_end(msg, hdr); 1438 genlmsg_end(msg, hdr);
803 1439
804 return 0; 1440 return 0;
@@ -872,6 +1508,8 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
872 [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 }, 1508 [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
873 [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 }, 1509 [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
874 [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 }, 1510 [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
1511 [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
1512 [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
875}; 1513};
876 1514
877static const struct genl_ops dropmon_ops[] = { 1515static const struct genl_ops dropmon_ops[] = {
@@ -934,9 +1572,57 @@ static struct notifier_block dropmon_net_notifier = {
934 .notifier_call = dropmon_net_event 1572 .notifier_call = dropmon_net_event
935}; 1573};
936 1574
937static int __init init_net_drop_monitor(void) 1575static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
1576{
1577 spin_lock_init(&data->lock);
1578 skb_queue_head_init(&data->drop_queue);
1579 u64_stats_init(&data->stats.syncp);
1580}
1581
1582static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
1583{
1584 WARN_ON(!skb_queue_empty(&data->drop_queue));
1585}
1586
1587static void net_dm_cpu_data_init(int cpu)
938{ 1588{
939 struct per_cpu_dm_data *data; 1589 struct per_cpu_dm_data *data;
1590
1591 data = &per_cpu(dm_cpu_data, cpu);
1592 __net_dm_cpu_data_init(data);
1593}
1594
1595static void net_dm_cpu_data_fini(int cpu)
1596{
1597 struct per_cpu_dm_data *data;
1598
1599 data = &per_cpu(dm_cpu_data, cpu);
1600 /* At this point, we should have exclusive access
1601 * to this struct and can free the skb inside it.
1602 */
1603 consume_skb(data->skb);
1604 __net_dm_cpu_data_fini(data);
1605}
1606
1607static void net_dm_hw_cpu_data_init(int cpu)
1608{
1609 struct per_cpu_dm_data *hw_data;
1610
1611 hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1612 __net_dm_cpu_data_init(hw_data);
1613}
1614
1615static void net_dm_hw_cpu_data_fini(int cpu)
1616{
1617 struct per_cpu_dm_data *hw_data;
1618
1619 hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1620 kfree(hw_data->hw_entries);
1621 __net_dm_cpu_data_fini(hw_data);
1622}
1623
1624static int __init init_net_drop_monitor(void)
1625{
940 int cpu, rc; 1626 int cpu, rc;
941 1627
942 pr_info("Initializing network drop monitor service\n"); 1628 pr_info("Initializing network drop monitor service\n");
@@ -962,10 +1648,8 @@ static int __init init_net_drop_monitor(void)
962 rc = 0; 1648 rc = 0;
963 1649
964 for_each_possible_cpu(cpu) { 1650 for_each_possible_cpu(cpu) {
965 data = &per_cpu(dm_cpu_data, cpu); 1651 net_dm_cpu_data_init(cpu);
966 spin_lock_init(&data->lock); 1652 net_dm_hw_cpu_data_init(cpu);
967 skb_queue_head_init(&data->drop_queue);
968 u64_stats_init(&data->stats.syncp);
969 } 1653 }
970 1654
971 goto out; 1655 goto out;
@@ -978,7 +1662,6 @@ out:
978 1662
979static void exit_net_drop_monitor(void) 1663static void exit_net_drop_monitor(void)
980{ 1664{
981 struct per_cpu_dm_data *data;
982 int cpu; 1665 int cpu;
983 1666
984 BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); 1667 BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
@@ -989,13 +1672,8 @@ static void exit_net_drop_monitor(void)
989 */ 1672 */
990 1673
991 for_each_possible_cpu(cpu) { 1674 for_each_possible_cpu(cpu) {
992 data = &per_cpu(dm_cpu_data, cpu); 1675 net_dm_hw_cpu_data_fini(cpu);
993 /* 1676 net_dm_cpu_data_fini(cpu);
994 * At this point, we should have exclusive access
995 * to this struct and can free the skb inside it
996 */
997 kfree_skb(data->skb);
998 WARN_ON(!skb_queue_empty(&data->drop_queue));
999 } 1677 }
1000 1678
1001 BUG_ON(genl_unregister_family(&net_drop_monitor_family)); 1679 BUG_ON(genl_unregister_family(&net_drop_monitor_family));