aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2014-06-26 07:16:59 -0400
committerDavid S. Miller <davem@davemloft.net>2014-07-01 18:50:23 -0400
commit8788370a1d4b1d89efc1aea1b13ea5e5bfe10fde (patch)
tree38a4eba0c82e6b77747fdd32bba7be7d88f6cdb1 /net/core
parentbaac167b706600ebe7158acaeb7c489ae9d0bb8b (diff)
pktgen: RCU-ify "if_list" to remove lock in next_to_run()
The if_lock()/if_unlock() in next_to_run() adds a significant overhead, because its called for every packet in busy loop of pktgen_thread_worker(). (Thomas Graf originally pointed me at this lock problem). Removing these two "LOCK" operations should in theory save us approx 16ns (8ns x 2), as illustrated below we do save 16ns when removing the locks and introducing RCU protection. Performance data with CLONE_SKB==100000, TX-size=512, rx-usecs=30: (single CPU performance, ixgbe 10Gbit/s, E5-2630) * Prev : 5684009 pps --> 175.93ns (1/5684009*10^9) * RCU-fix: 6272204 pps --> 159.43ns (1/6272204*10^9) * Diff : +588195 pps --> -16.50ns To understand this RCU patch, I describe the pktgen thread model below. In pktgen there is several kernel threads, but there is only one CPU running each kernel thread. Communication with the kernel threads are done through some thread control flags. This allow the thread to change data structures at a know synchronization point, see main thread func pktgen_thread_worker(). Userspace changes are communicated through proc-file writes. There are three types of changes, general control changes "pgctrl" (func:pgctrl_write), thread changes "kpktgend_X" (func:pktgen_thread_write), and interface config changes "etcX@N" (func:pktgen_if_write). Userspace "pgctrl" and "thread" changes are synchronized via the mutex pktgen_thread_lock, thus only a single userspace instance can run. The mutex is taken while the packet generator is running, by pgctrl "start". Thus e.g. "add_device" cannot be invoked when pktgen is running/started. All "pgctrl" and all "thread" changes, except thread "add_device", communicate via the thread control flags. The main problem is the exception "add_device", that modifies threads "if_list" directly. Fortunately "add_device" cannot be invoked while pktgen is running. But there exists a race between "rem_device_all" and "add_device" (which normally don't occur, because "rem_device_all" waits 125ms before returning). Background'ing "rem_device_all" and running "add_device" immediately allow the race to occur. The race affects the threads (list of devices) "if_list". The if_lock is used for protecting this "if_list". Other readers are given lock-free access to the list under RCU read sections. Note, interface config changes (via proc) can occur while pktgen is running, which worries me a bit. I'm assuming proc_remove() takes appropriate locks, to assure no writers exists after proc_remove() finish. I've been running a script exercising the race condition (leading me to fix the proc_remove order), without any issues. The script also exercises concurrent proc writes, while the interface config is getting removed. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/pktgen.c101
1 files changed, 52 insertions, 49 deletions
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b61f553689b6..5b05e364b8ae 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,8 +69,9 @@
69 * for running devices in the if_list and sends packets until count is 0 it 69 * for running devices in the if_list and sends packets until count is 0 it
70 * also the thread checks the thread->control which is used for inter-process 70 * also the thread checks the thread->control which is used for inter-process
71 * communication. controlling process "posts" operations to the threads this 71 * communication. controlling process "posts" operations to the threads this
72 * way. The if_lock should be possible to remove when add/rem_device is merged 72 * way.
73 * into this too. 73 * The if_list is RCU protected, and the if_lock remains to protect updating
74 * of if_list, from "add_device" as it invoked from userspace (via proc write).
74 * 75 *
75 * By design there should only be *one* "controlling" process. In practice 76 * By design there should only be *one* "controlling" process. In practice
76 * multiple write accesses gives unpredictable result. Understood by "write" 77 * multiple write accesses gives unpredictable result. Understood by "write"
@@ -208,7 +209,7 @@
208#define T_REMDEVALL (1<<2) /* Remove all devs */ 209#define T_REMDEVALL (1<<2) /* Remove all devs */
209#define T_REMDEV (1<<3) /* Remove one dev */ 210#define T_REMDEV (1<<3) /* Remove one dev */
210 211
211/* If lock -- can be removed after some work */ 212/* If lock -- protects updating of if_list */
212#define if_lock(t) spin_lock(&(t->if_lock)); 213#define if_lock(t) spin_lock(&(t->if_lock));
213#define if_unlock(t) spin_unlock(&(t->if_lock)); 214#define if_unlock(t) spin_unlock(&(t->if_lock));
214 215
@@ -241,6 +242,7 @@ struct pktgen_dev {
241 struct proc_dir_entry *entry; /* proc file */ 242 struct proc_dir_entry *entry; /* proc file */
242 struct pktgen_thread *pg_thread;/* the owner */ 243 struct pktgen_thread *pg_thread;/* the owner */
243 struct list_head list; /* chaining in the thread's run-queue */ 244 struct list_head list; /* chaining in the thread's run-queue */
245 struct rcu_head rcu; /* freed by RCU */
244 246
245 int running; /* if false, the test will stop */ 247 int running; /* if false, the test will stop */
246 248
@@ -1737,14 +1739,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1737 1739
1738 seq_puts(seq, "Running: "); 1740 seq_puts(seq, "Running: ");
1739 1741
1740 if_lock(t); 1742 rcu_read_lock();
1741 list_for_each_entry(pkt_dev, &t->if_list, list) 1743 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
1742 if (pkt_dev->running) 1744 if (pkt_dev->running)
1743 seq_printf(seq, "%s ", pkt_dev->odevname); 1745 seq_printf(seq, "%s ", pkt_dev->odevname);
1744 1746
1745 seq_puts(seq, "\nStopped: "); 1747 seq_puts(seq, "\nStopped: ");
1746 1748
1747 list_for_each_entry(pkt_dev, &t->if_list, list) 1749 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
1748 if (!pkt_dev->running) 1750 if (!pkt_dev->running)
1749 seq_printf(seq, "%s ", pkt_dev->odevname); 1751 seq_printf(seq, "%s ", pkt_dev->odevname);
1750 1752
@@ -1753,7 +1755,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1753 else 1755 else
1754 seq_puts(seq, "\nResult: NA\n"); 1756 seq_puts(seq, "\nResult: NA\n");
1755 1757
1756 if_unlock(t); 1758 rcu_read_unlock();
1757 1759
1758 return 0; 1760 return 0;
1759} 1761}
@@ -1878,10 +1880,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
1878 pkt_dev = pktgen_find_dev(t, ifname, exact); 1880 pkt_dev = pktgen_find_dev(t, ifname, exact);
1879 if (pkt_dev) { 1881 if (pkt_dev) {
1880 if (remove) { 1882 if (remove) {
1881 if_lock(t);
1882 pkt_dev->removal_mark = 1; 1883 pkt_dev->removal_mark = 1;
1883 t->control |= T_REMDEV; 1884 t->control |= T_REMDEV;
1884 if_unlock(t);
1885 } 1885 }
1886 break; 1886 break;
1887 } 1887 }
@@ -1931,7 +1931,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1931 list_for_each_entry(t, &pn->pktgen_threads, th_list) { 1931 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1932 struct pktgen_dev *pkt_dev; 1932 struct pktgen_dev *pkt_dev;
1933 1933
1934 list_for_each_entry(pkt_dev, &t->if_list, list) { 1934 rcu_read_lock();
1935 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
1935 if (pkt_dev->odev != dev) 1936 if (pkt_dev->odev != dev)
1936 continue; 1937 continue;
1937 1938
@@ -1946,6 +1947,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1946 dev->name); 1947 dev->name);
1947 break; 1948 break;
1948 } 1949 }
1950 rcu_read_unlock();
1949 } 1951 }
1950} 1952}
1951 1953
@@ -2997,8 +2999,8 @@ static void pktgen_run(struct pktgen_thread *t)
2997 2999
2998 func_enter(); 3000 func_enter();
2999 3001
3000 if_lock(t); 3002 rcu_read_lock();
3001 list_for_each_entry(pkt_dev, &t->if_list, list) { 3003 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3002 3004
3003 /* 3005 /*
3004 * setup odev and create initial packet. 3006 * setup odev and create initial packet.
@@ -3007,18 +3009,18 @@ static void pktgen_run(struct pktgen_thread *t)
3007 3009
3008 if (pkt_dev->odev) { 3010 if (pkt_dev->odev) {
3009 pktgen_clear_counters(pkt_dev); 3011 pktgen_clear_counters(pkt_dev);
3010 pkt_dev->running = 1; /* Cranke yeself! */
3011 pkt_dev->skb = NULL; 3012 pkt_dev->skb = NULL;
3012 pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); 3013 pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
3013 3014
3014 set_pkt_overhead(pkt_dev); 3015 set_pkt_overhead(pkt_dev);
3015 3016
3016 strcpy(pkt_dev->result, "Starting"); 3017 strcpy(pkt_dev->result, "Starting");
3018 pkt_dev->running = 1; /* Cranke yeself! */
3017 started++; 3019 started++;
3018 } else 3020 } else
3019 strcpy(pkt_dev->result, "Error starting"); 3021 strcpy(pkt_dev->result, "Error starting");
3020 } 3022 }
3021 if_unlock(t); 3023 rcu_read_unlock();
3022 if (started) 3024 if (started)
3023 t->control &= ~(T_STOP); 3025 t->control &= ~(T_STOP);
3024} 3026}
@@ -3041,27 +3043,25 @@ static int thread_is_running(const struct pktgen_thread *t)
3041{ 3043{
3042 const struct pktgen_dev *pkt_dev; 3044 const struct pktgen_dev *pkt_dev;
3043 3045
3044 list_for_each_entry(pkt_dev, &t->if_list, list) 3046 rcu_read_lock();
3045 if (pkt_dev->running) 3047 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
3048 if (pkt_dev->running) {
3049 rcu_read_unlock();
3046 return 1; 3050 return 1;
3051 }
3052 rcu_read_unlock();
3047 return 0; 3053 return 0;
3048} 3054}
3049 3055
3050static int pktgen_wait_thread_run(struct pktgen_thread *t) 3056static int pktgen_wait_thread_run(struct pktgen_thread *t)
3051{ 3057{
3052 if_lock(t);
3053
3054 while (thread_is_running(t)) { 3058 while (thread_is_running(t)) {
3055 3059
3056 if_unlock(t);
3057
3058 msleep_interruptible(100); 3060 msleep_interruptible(100);
3059 3061
3060 if (signal_pending(current)) 3062 if (signal_pending(current))
3061 goto signal; 3063 goto signal;
3062 if_lock(t);
3063 } 3064 }
3064 if_unlock(t);
3065 return 1; 3065 return 1;
3066signal: 3066signal:
3067 return 0; 3067 return 0;
@@ -3166,10 +3166,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3166 return -EINVAL; 3166 return -EINVAL;
3167 } 3167 }
3168 3168
3169 pkt_dev->running = 0;
3169 kfree_skb(pkt_dev->skb); 3170 kfree_skb(pkt_dev->skb);
3170 pkt_dev->skb = NULL; 3171 pkt_dev->skb = NULL;
3171 pkt_dev->stopped_at = ktime_get(); 3172 pkt_dev->stopped_at = ktime_get();
3172 pkt_dev->running = 0;
3173 3173
3174 show_results(pkt_dev, nr_frags); 3174 show_results(pkt_dev, nr_frags);
3175 3175
@@ -3180,9 +3180,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3180{ 3180{
3181 struct pktgen_dev *pkt_dev, *best = NULL; 3181 struct pktgen_dev *pkt_dev, *best = NULL;
3182 3182
3183 if_lock(t); 3183 rcu_read_lock();
3184 3184 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3185 list_for_each_entry(pkt_dev, &t->if_list, list) {
3186 if (!pkt_dev->running) 3185 if (!pkt_dev->running)
3187 continue; 3186 continue;
3188 if (best == NULL) 3187 if (best == NULL)
@@ -3190,7 +3189,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3190 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) 3189 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
3191 best = pkt_dev; 3190 best = pkt_dev;
3192 } 3191 }
3193 if_unlock(t); 3192 rcu_read_unlock();
3193
3194 return best; 3194 return best;
3195} 3195}
3196 3196
@@ -3200,13 +3200,13 @@ static void pktgen_stop(struct pktgen_thread *t)
3200 3200
3201 func_enter(); 3201 func_enter();
3202 3202
3203 if_lock(t); 3203 rcu_read_lock();
3204 3204
3205 list_for_each_entry(pkt_dev, &t->if_list, list) { 3205 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3206 pktgen_stop_device(pkt_dev); 3206 pktgen_stop_device(pkt_dev);
3207 } 3207 }
3208 3208
3209 if_unlock(t); 3209 rcu_read_unlock();
3210} 3210}
3211 3211
3212/* 3212/*
@@ -3220,8 +3220,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3220 3220
3221 func_enter(); 3221 func_enter();
3222 3222
3223 if_lock(t);
3224
3225 list_for_each_safe(q, n, &t->if_list) { 3223 list_for_each_safe(q, n, &t->if_list) {
3226 cur = list_entry(q, struct pktgen_dev, list); 3224 cur = list_entry(q, struct pktgen_dev, list);
3227 3225
@@ -3235,8 +3233,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3235 3233
3236 break; 3234 break;
3237 } 3235 }
3238
3239 if_unlock(t);
3240} 3236}
3241 3237
3242static void pktgen_rem_all_ifs(struct pktgen_thread *t) 3238static void pktgen_rem_all_ifs(struct pktgen_thread *t)
@@ -3248,8 +3244,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3248 3244
3249 /* Remove all devices, free mem */ 3245 /* Remove all devices, free mem */
3250 3246
3251 if_lock(t);
3252
3253 list_for_each_safe(q, n, &t->if_list) { 3247 list_for_each_safe(q, n, &t->if_list) {
3254 cur = list_entry(q, struct pktgen_dev, list); 3248 cur = list_entry(q, struct pktgen_dev, list);
3255 3249
@@ -3258,8 +3252,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3258 3252
3259 pktgen_remove_device(t, cur); 3253 pktgen_remove_device(t, cur);
3260 } 3254 }
3261
3262 if_unlock(t);
3263} 3255}
3264 3256
3265static void pktgen_rem_thread(struct pktgen_thread *t) 3257static void pktgen_rem_thread(struct pktgen_thread *t)
@@ -3482,8 +3474,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3482 struct pktgen_dev *p, *pkt_dev = NULL; 3474 struct pktgen_dev *p, *pkt_dev = NULL;
3483 size_t len = strlen(ifname); 3475 size_t len = strlen(ifname);
3484 3476
3485 if_lock(t); 3477 rcu_read_lock();
3486 list_for_each_entry(p, &t->if_list, list) 3478 list_for_each_entry_rcu(p, &t->if_list, list)
3487 if (strncmp(p->odevname, ifname, len) == 0) { 3479 if (strncmp(p->odevname, ifname, len) == 0) {
3488 if (p->odevname[len]) { 3480 if (p->odevname[len]) {
3489 if (exact || p->odevname[len] != '@') 3481 if (exact || p->odevname[len] != '@')
@@ -3493,7 +3485,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3493 break; 3485 break;
3494 } 3486 }
3495 3487
3496 if_unlock(t); 3488 rcu_read_unlock();
3497 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); 3489 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
3498 return pkt_dev; 3490 return pkt_dev;
3499} 3491}
@@ -3507,6 +3499,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3507{ 3499{
3508 int rv = 0; 3500 int rv = 0;
3509 3501
3502 /* This function cannot be called concurrently, as its called
3503 * under pktgen_thread_lock mutex, but it can run from
3504 * userspace on another CPU than the kthread. The if_lock()
3505 * is used here to sync with concurrent instances of
3506 * _rem_dev_from_if_list() invoked via kthread, which is also
3507 * updating the if_list */
3510 if_lock(t); 3508 if_lock(t);
3511 3509
3512 if (pkt_dev->pg_thread) { 3510 if (pkt_dev->pg_thread) {
@@ -3515,9 +3513,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3515 goto out; 3513 goto out;
3516 } 3514 }
3517 3515
3518 list_add(&pkt_dev->list, &t->if_list);
3519 pkt_dev->pg_thread = t;
3520 pkt_dev->running = 0; 3516 pkt_dev->running = 0;
3517 pkt_dev->pg_thread = t;
3518 list_add_rcu(&pkt_dev->list, &t->if_list);
3521 3519
3522out: 3520out:
3523 if_unlock(t); 3521 if_unlock(t);
@@ -3672,11 +3670,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3672 struct list_head *q, *n; 3670 struct list_head *q, *n;
3673 struct pktgen_dev *p; 3671 struct pktgen_dev *p;
3674 3672
3673 if_lock(t);
3675 list_for_each_safe(q, n, &t->if_list) { 3674 list_for_each_safe(q, n, &t->if_list) {
3676 p = list_entry(q, struct pktgen_dev, list); 3675 p = list_entry(q, struct pktgen_dev, list);
3677 if (p == pkt_dev) 3676 if (p == pkt_dev)
3678 list_del(&p->list); 3677 list_del_rcu(&p->list);
3679 } 3678 }
3679 if_unlock(t);
3680} 3680}
3681 3681
3682static int pktgen_remove_device(struct pktgen_thread *t, 3682static int pktgen_remove_device(struct pktgen_thread *t,
@@ -3696,20 +3696,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3696 pkt_dev->odev = NULL; 3696 pkt_dev->odev = NULL;
3697 } 3697 }
3698 3698
3699 /* And update the thread if_list */ 3699 /* Remove proc before if_list entry, because add_device uses
3700 3700 * list to determine if interface already exist, avoid race
3701 _rem_dev_from_if_list(t, pkt_dev); 3701 * with proc_create_data() */
3702
3703 if (pkt_dev->entry) 3702 if (pkt_dev->entry)
3704 proc_remove(pkt_dev->entry); 3703 proc_remove(pkt_dev->entry);
3705 3704
3705 /* And update the thread if_list */
3706 _rem_dev_from_if_list(t, pkt_dev);
3707
3706#ifdef CONFIG_XFRM 3708#ifdef CONFIG_XFRM
3707 free_SAs(pkt_dev); 3709 free_SAs(pkt_dev);
3708#endif 3710#endif
3709 vfree(pkt_dev->flows); 3711 vfree(pkt_dev->flows);
3710 if (pkt_dev->page) 3712 if (pkt_dev->page)
3711 put_page(pkt_dev->page); 3713 put_page(pkt_dev->page);
3712 kfree(pkt_dev); 3714 kfree_rcu(pkt_dev, rcu);
3713 return 0; 3715 return 0;
3714} 3716}
3715 3717
@@ -3809,6 +3811,7 @@ static void __exit pg_cleanup(void)
3809{ 3811{
3810 unregister_netdevice_notifier(&pktgen_notifier_block); 3812 unregister_netdevice_notifier(&pktgen_notifier_block);
3811 unregister_pernet_subsys(&pg_net_ops); 3813 unregister_pernet_subsys(&pg_net_ops);
3814 /* Don't need rcu_barrier() due to use of kfree_rcu() */
3812} 3815}
3813 3816
3814module_init(pg_init); 3817module_init(pg_init);