diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/irq/chip.c | 63 | ||||
-rw-r--r-- | kernel/irq/migration.c | 34 | ||||
-rw-r--r-- | kernel/rcupdate.c | 11 | ||||
-rw-r--r-- | kernel/rcutorture.c | 317 | ||||
-rw-r--r-- | kernel/srcu.c | 258 | ||||
-rw-r--r-- | kernel/sys.c | 125 |
7 files changed, 773 insertions, 37 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d948ca12acf0..5e3f3b75563a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o latency.o nsproxy.o | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o |
12 | 12 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-y += time/ | 14 | obj-y += time/ |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 736cb0bd498f..4cf65f5c6a74 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -18,6 +18,69 @@ | |||
18 | #include "internals.h" | 18 | #include "internals.h" |
19 | 19 | ||
20 | /** | 20 | /** |
21 | * dynamic_irq_init - initialize a dynamically allocated irq | ||
22 | * @irq: irq number to initialize | ||
23 | */ | ||
24 | void dynamic_irq_init(unsigned int irq) | ||
25 | { | ||
26 | struct irq_desc *desc; | ||
27 | unsigned long flags; | ||
28 | |||
29 | if (irq >= NR_IRQS) { | ||
30 | printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); | ||
31 | WARN_ON(1); | ||
32 | return; | ||
33 | } | ||
34 | |||
35 | /* Ensure we don't have left over values from a previous use of this irq */ | ||
36 | desc = irq_desc + irq; | ||
37 | spin_lock_irqsave(&desc->lock, flags); | ||
38 | desc->status = IRQ_DISABLED; | ||
39 | desc->chip = &no_irq_chip; | ||
40 | desc->handle_irq = handle_bad_irq; | ||
41 | desc->depth = 1; | ||
42 | desc->handler_data = NULL; | ||
43 | desc->chip_data = NULL; | ||
44 | desc->action = NULL; | ||
45 | desc->irq_count = 0; | ||
46 | desc->irqs_unhandled = 0; | ||
47 | #ifdef CONFIG_SMP | ||
48 | desc->affinity = CPU_MASK_ALL; | ||
49 | #endif | ||
50 | spin_unlock_irqrestore(&desc->lock, flags); | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
55 | * @irq: irq number to initialize | ||
56 | */ | ||
57 | void dynamic_irq_cleanup(unsigned int irq) | ||
58 | { | ||
59 | struct irq_desc *desc; | ||
60 | unsigned long flags; | ||
61 | |||
62 | if (irq >= NR_IRQS) { | ||
63 | printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); | ||
64 | WARN_ON(1); | ||
65 | return; | ||
66 | } | ||
67 | |||
68 | desc = irq_desc + irq; | ||
69 | spin_lock_irqsave(&desc->lock, flags); | ||
70 | if (desc->action) { | ||
71 | spin_unlock_irqrestore(&desc->lock, flags); | ||
72 | printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n", | ||
73 | irq); | ||
74 | WARN_ON(1); | ||
75 | return; | ||
76 | } | ||
77 | desc->handle_irq = handle_bad_irq; | ||
78 | desc->chip = &no_irq_chip; | ||
79 | spin_unlock_irqrestore(&desc->lock, flags); | ||
80 | } | ||
81 | |||
82 | |||
83 | /** | ||
21 | * set_irq_chip - set the irq chip for an irq | 84 | * set_irq_chip - set the irq chip for an irq |
22 | * @irq: irq number | 85 | * @irq: irq number |
23 | * @chip: pointer to irq chip description structure | 86 | * @chip: pointer to irq chip description structure |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index a57ebe9fa6f6..4baa3bbcd25a 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -7,17 +7,17 @@ void set_pending_irq(unsigned int irq, cpumask_t mask) | |||
7 | unsigned long flags; | 7 | unsigned long flags; |
8 | 8 | ||
9 | spin_lock_irqsave(&desc->lock, flags); | 9 | spin_lock_irqsave(&desc->lock, flags); |
10 | desc->move_irq = 1; | 10 | desc->status |= IRQ_MOVE_PENDING; |
11 | irq_desc[irq].pending_mask = mask; | 11 | irq_desc[irq].pending_mask = mask; |
12 | spin_unlock_irqrestore(&desc->lock, flags); | 12 | spin_unlock_irqrestore(&desc->lock, flags); |
13 | } | 13 | } |
14 | 14 | ||
15 | void move_native_irq(int irq) | 15 | void move_masked_irq(int irq) |
16 | { | 16 | { |
17 | struct irq_desc *desc = irq_desc + irq; | 17 | struct irq_desc *desc = irq_desc + irq; |
18 | cpumask_t tmp; | 18 | cpumask_t tmp; |
19 | 19 | ||
20 | if (likely(!desc->move_irq)) | 20 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
21 | return; | 21 | return; |
22 | 22 | ||
23 | /* | 23 | /* |
@@ -28,7 +28,7 @@ void move_native_irq(int irq) | |||
28 | return; | 28 | return; |
29 | } | 29 | } |
30 | 30 | ||
31 | desc->move_irq = 0; | 31 | desc->status &= ~IRQ_MOVE_PENDING; |
32 | 32 | ||
33 | if (unlikely(cpus_empty(irq_desc[irq].pending_mask))) | 33 | if (unlikely(cpus_empty(irq_desc[irq].pending_mask))) |
34 | return; | 34 | return; |
@@ -48,15 +48,29 @@ void move_native_irq(int irq) | |||
48 | * when an active trigger is comming in. This could | 48 | * when an active trigger is comming in. This could |
49 | * cause some ioapics to mal-function. | 49 | * cause some ioapics to mal-function. |
50 | * Being paranoid i guess! | 50 | * Being paranoid i guess! |
51 | * | ||
52 | * For correct operation this depends on the caller | ||
53 | * masking the irqs. | ||
51 | */ | 54 | */ |
52 | if (likely(!cpus_empty(tmp))) { | 55 | if (likely(!cpus_empty(tmp))) { |
53 | if (likely(!(desc->status & IRQ_DISABLED))) | ||
54 | desc->chip->disable(irq); | ||
55 | |||
56 | desc->chip->set_affinity(irq,tmp); | 56 | desc->chip->set_affinity(irq,tmp); |
57 | |||
58 | if (likely(!(desc->status & IRQ_DISABLED))) | ||
59 | desc->chip->enable(irq); | ||
60 | } | 57 | } |
61 | cpus_clear(irq_desc[irq].pending_mask); | 58 | cpus_clear(irq_desc[irq].pending_mask); |
62 | } | 59 | } |
60 | |||
61 | void move_native_irq(int irq) | ||
62 | { | ||
63 | struct irq_desc *desc = irq_desc + irq; | ||
64 | |||
65 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | ||
66 | return; | ||
67 | |||
68 | if (likely(!(desc->status & IRQ_DISABLED))) | ||
69 | desc->chip->disable(irq); | ||
70 | |||
71 | move_masked_irq(irq); | ||
72 | |||
73 | if (likely(!(desc->status & IRQ_DISABLED))) | ||
74 | desc->chip->enable(irq); | ||
75 | } | ||
76 | |||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 523e46483b99..26bb5ffe1ef1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -71,9 +71,6 @@ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; | |||
71 | static int blimit = 10; | 71 | static int blimit = 10; |
72 | static int qhimark = 10000; | 72 | static int qhimark = 10000; |
73 | static int qlowmark = 100; | 73 | static int qlowmark = 100; |
74 | #ifdef CONFIG_SMP | ||
75 | static int rsinterval = 1000; | ||
76 | #endif | ||
77 | 74 | ||
78 | static atomic_t rcu_barrier_cpu_count; | 75 | static atomic_t rcu_barrier_cpu_count; |
79 | static DEFINE_MUTEX(rcu_barrier_mutex); | 76 | static DEFINE_MUTEX(rcu_barrier_mutex); |
@@ -86,8 +83,8 @@ static void force_quiescent_state(struct rcu_data *rdp, | |||
86 | int cpu; | 83 | int cpu; |
87 | cpumask_t cpumask; | 84 | cpumask_t cpumask; |
88 | set_need_resched(); | 85 | set_need_resched(); |
89 | if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { | 86 | if (unlikely(!rcp->signaled)) { |
90 | rdp->last_rs_qlen = rdp->qlen; | 87 | rcp->signaled = 1; |
91 | /* | 88 | /* |
92 | * Don't send IPI to itself. With irqs disabled, | 89 | * Don't send IPI to itself. With irqs disabled, |
93 | * rdp->cpu is the current cpu. | 90 | * rdp->cpu is the current cpu. |
@@ -301,6 +298,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) | |||
301 | smp_mb(); | 298 | smp_mb(); |
302 | cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); | 299 | cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); |
303 | 300 | ||
301 | rcp->signaled = 0; | ||
304 | } | 302 | } |
305 | } | 303 | } |
306 | 304 | ||
@@ -628,9 +626,6 @@ void synchronize_rcu(void) | |||
628 | module_param(blimit, int, 0); | 626 | module_param(blimit, int, 0); |
629 | module_param(qhimark, int, 0); | 627 | module_param(qhimark, int, 0); |
630 | module_param(qlowmark, int, 0); | 628 | module_param(qlowmark, int, 0); |
631 | #ifdef CONFIG_SMP | ||
632 | module_param(rsinterval, int, 0); | ||
633 | #endif | ||
634 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 629 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
635 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | 630 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); |
636 | EXPORT_SYMBOL_GPL(call_rcu); | 631 | EXPORT_SYMBOL_GPL(call_rcu); |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 23446e91cded..e2bda18f6f42 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -15,9 +15,10 @@ | |||
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 | * | 17 | * |
18 | * Copyright (C) IBM Corporation, 2005 | 18 | * Copyright (C) IBM Corporation, 2005, 2006 |
19 | * | 19 | * |
20 | * Authors: Paul E. McKenney <paulmck@us.ibm.com> | 20 | * Authors: Paul E. McKenney <paulmck@us.ibm.com> |
21 | * Josh Triplett <josh@freedesktop.org> | ||
21 | * | 22 | * |
22 | * See also: Documentation/RCU/torture.txt | 23 | * See also: Documentation/RCU/torture.txt |
23 | */ | 24 | */ |
@@ -44,19 +45,25 @@ | |||
44 | #include <linux/delay.h> | 45 | #include <linux/delay.h> |
45 | #include <linux/byteorder/swabb.h> | 46 | #include <linux/byteorder/swabb.h> |
46 | #include <linux/stat.h> | 47 | #include <linux/stat.h> |
48 | #include <linux/srcu.h> | ||
47 | 49 | ||
48 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
51 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | ||
52 | "Josh Triplett <josh@freedesktop.org>"); | ||
49 | 53 | ||
50 | static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ | 54 | static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ |
55 | static int nfakewriters = 4; /* # fake writer threads */ | ||
51 | static int stat_interval; /* Interval between stats, in seconds. */ | 56 | static int stat_interval; /* Interval between stats, in seconds. */ |
52 | /* Defaults to "only at end of test". */ | 57 | /* Defaults to "only at end of test". */ |
53 | static int verbose; /* Print more debug info. */ | 58 | static int verbose; /* Print more debug info. */ |
54 | static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ | 59 | static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ |
55 | static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ | 60 | static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ |
56 | static char *torture_type = "rcu"; /* What to torture. */ | 61 | static char *torture_type = "rcu"; /* What RCU implementation to torture. */ |
57 | 62 | ||
58 | module_param(nreaders, int, 0); | 63 | module_param(nreaders, int, 0); |
59 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | 64 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); |
65 | module_param(nfakewriters, int, 0); | ||
66 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | ||
60 | module_param(stat_interval, int, 0); | 67 | module_param(stat_interval, int, 0); |
61 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | 68 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); |
62 | module_param(verbose, bool, 0); | 69 | module_param(verbose, bool, 0); |
@@ -66,7 +73,7 @@ MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); | |||
66 | module_param(shuffle_interval, int, 0); | 73 | module_param(shuffle_interval, int, 0); |
67 | MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); | 74 | MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); |
68 | module_param(torture_type, charp, 0); | 75 | module_param(torture_type, charp, 0); |
69 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh)"); | 76 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); |
70 | 77 | ||
71 | #define TORTURE_FLAG "-torture:" | 78 | #define TORTURE_FLAG "-torture:" |
72 | #define PRINTK_STRING(s) \ | 79 | #define PRINTK_STRING(s) \ |
@@ -80,6 +87,7 @@ static char printk_buf[4096]; | |||
80 | 87 | ||
81 | static int nrealreaders; | 88 | static int nrealreaders; |
82 | static struct task_struct *writer_task; | 89 | static struct task_struct *writer_task; |
90 | static struct task_struct **fakewriter_tasks; | ||
83 | static struct task_struct **reader_tasks; | 91 | static struct task_struct **reader_tasks; |
84 | static struct task_struct *stats_task; | 92 | static struct task_struct *stats_task; |
85 | static struct task_struct *shuffler_task; | 93 | static struct task_struct *shuffler_task; |
@@ -104,11 +112,12 @@ static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = | |||
104 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = | 112 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = |
105 | { 0 }; | 113 | { 0 }; |
106 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; | 114 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; |
107 | atomic_t n_rcu_torture_alloc; | 115 | static atomic_t n_rcu_torture_alloc; |
108 | atomic_t n_rcu_torture_alloc_fail; | 116 | static atomic_t n_rcu_torture_alloc_fail; |
109 | atomic_t n_rcu_torture_free; | 117 | static atomic_t n_rcu_torture_free; |
110 | atomic_t n_rcu_torture_mberror; | 118 | static atomic_t n_rcu_torture_mberror; |
111 | atomic_t n_rcu_torture_error; | 119 | static atomic_t n_rcu_torture_error; |
120 | static struct list_head rcu_torture_removed; | ||
112 | 121 | ||
113 | /* | 122 | /* |
114 | * Allocate an element from the rcu_tortures pool. | 123 | * Allocate an element from the rcu_tortures pool. |
@@ -145,7 +154,7 @@ rcu_torture_free(struct rcu_torture *p) | |||
145 | 154 | ||
146 | struct rcu_random_state { | 155 | struct rcu_random_state { |
147 | unsigned long rrs_state; | 156 | unsigned long rrs_state; |
148 | unsigned long rrs_count; | 157 | long rrs_count; |
149 | }; | 158 | }; |
150 | 159 | ||
151 | #define RCU_RANDOM_MULT 39916801 /* prime */ | 160 | #define RCU_RANDOM_MULT 39916801 /* prime */ |
@@ -158,7 +167,7 @@ struct rcu_random_state { | |||
158 | * Crude but fast random-number generator. Uses a linear congruential | 167 | * Crude but fast random-number generator. Uses a linear congruential |
159 | * generator, with occasional help from get_random_bytes(). | 168 | * generator, with occasional help from get_random_bytes(). |
160 | */ | 169 | */ |
161 | static long | 170 | static unsigned long |
162 | rcu_random(struct rcu_random_state *rrsp) | 171 | rcu_random(struct rcu_random_state *rrsp) |
163 | { | 172 | { |
164 | long refresh; | 173 | long refresh; |
@@ -180,9 +189,11 @@ struct rcu_torture_ops { | |||
180 | void (*init)(void); | 189 | void (*init)(void); |
181 | void (*cleanup)(void); | 190 | void (*cleanup)(void); |
182 | int (*readlock)(void); | 191 | int (*readlock)(void); |
192 | void (*readdelay)(struct rcu_random_state *rrsp); | ||
183 | void (*readunlock)(int idx); | 193 | void (*readunlock)(int idx); |
184 | int (*completed)(void); | 194 | int (*completed)(void); |
185 | void (*deferredfree)(struct rcu_torture *p); | 195 | void (*deferredfree)(struct rcu_torture *p); |
196 | void (*sync)(void); | ||
186 | int (*stats)(char *page); | 197 | int (*stats)(char *page); |
187 | char *name; | 198 | char *name; |
188 | }; | 199 | }; |
@@ -198,6 +209,18 @@ static int rcu_torture_read_lock(void) __acquires(RCU) | |||
198 | return 0; | 209 | return 0; |
199 | } | 210 | } |
200 | 211 | ||
212 | static void rcu_read_delay(struct rcu_random_state *rrsp) | ||
213 | { | ||
214 | long delay; | ||
215 | const long longdelay = 200; | ||
216 | |||
217 | /* We want there to be long-running readers, but not all the time. */ | ||
218 | |||
219 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); | ||
220 | if (!delay) | ||
221 | udelay(longdelay); | ||
222 | } | ||
223 | |||
201 | static void rcu_torture_read_unlock(int idx) __releases(RCU) | 224 | static void rcu_torture_read_unlock(int idx) __releases(RCU) |
202 | { | 225 | { |
203 | rcu_read_unlock(); | 226 | rcu_read_unlock(); |
@@ -239,13 +262,54 @@ static struct rcu_torture_ops rcu_ops = { | |||
239 | .init = NULL, | 262 | .init = NULL, |
240 | .cleanup = NULL, | 263 | .cleanup = NULL, |
241 | .readlock = rcu_torture_read_lock, | 264 | .readlock = rcu_torture_read_lock, |
265 | .readdelay = rcu_read_delay, | ||
242 | .readunlock = rcu_torture_read_unlock, | 266 | .readunlock = rcu_torture_read_unlock, |
243 | .completed = rcu_torture_completed, | 267 | .completed = rcu_torture_completed, |
244 | .deferredfree = rcu_torture_deferred_free, | 268 | .deferredfree = rcu_torture_deferred_free, |
269 | .sync = synchronize_rcu, | ||
245 | .stats = NULL, | 270 | .stats = NULL, |
246 | .name = "rcu" | 271 | .name = "rcu" |
247 | }; | 272 | }; |
248 | 273 | ||
274 | static void rcu_sync_torture_deferred_free(struct rcu_torture *p) | ||
275 | { | ||
276 | int i; | ||
277 | struct rcu_torture *rp; | ||
278 | struct rcu_torture *rp1; | ||
279 | |||
280 | cur_ops->sync(); | ||
281 | list_add(&p->rtort_free, &rcu_torture_removed); | ||
282 | list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) { | ||
283 | i = rp->rtort_pipe_count; | ||
284 | if (i > RCU_TORTURE_PIPE_LEN) | ||
285 | i = RCU_TORTURE_PIPE_LEN; | ||
286 | atomic_inc(&rcu_torture_wcount[i]); | ||
287 | if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { | ||
288 | rp->rtort_mbtest = 0; | ||
289 | list_del(&rp->rtort_free); | ||
290 | rcu_torture_free(rp); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static void rcu_sync_torture_init(void) | ||
296 | { | ||
297 | INIT_LIST_HEAD(&rcu_torture_removed); | ||
298 | } | ||
299 | |||
300 | static struct rcu_torture_ops rcu_sync_ops = { | ||
301 | .init = rcu_sync_torture_init, | ||
302 | .cleanup = NULL, | ||
303 | .readlock = rcu_torture_read_lock, | ||
304 | .readdelay = rcu_read_delay, | ||
305 | .readunlock = rcu_torture_read_unlock, | ||
306 | .completed = rcu_torture_completed, | ||
307 | .deferredfree = rcu_sync_torture_deferred_free, | ||
308 | .sync = synchronize_rcu, | ||
309 | .stats = NULL, | ||
310 | .name = "rcu_sync" | ||
311 | }; | ||
312 | |||
249 | /* | 313 | /* |
250 | * Definitions for rcu_bh torture testing. | 314 | * Definitions for rcu_bh torture testing. |
251 | */ | 315 | */ |
@@ -271,19 +335,176 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | |||
271 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | 335 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); |
272 | } | 336 | } |
273 | 337 | ||
338 | struct rcu_bh_torture_synchronize { | ||
339 | struct rcu_head head; | ||
340 | struct completion completion; | ||
341 | }; | ||
342 | |||
343 | static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) | ||
344 | { | ||
345 | struct rcu_bh_torture_synchronize *rcu; | ||
346 | |||
347 | rcu = container_of(head, struct rcu_bh_torture_synchronize, head); | ||
348 | complete(&rcu->completion); | ||
349 | } | ||
350 | |||
351 | static void rcu_bh_torture_synchronize(void) | ||
352 | { | ||
353 | struct rcu_bh_torture_synchronize rcu; | ||
354 | |||
355 | init_completion(&rcu.completion); | ||
356 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | ||
357 | wait_for_completion(&rcu.completion); | ||
358 | } | ||
359 | |||
274 | static struct rcu_torture_ops rcu_bh_ops = { | 360 | static struct rcu_torture_ops rcu_bh_ops = { |
275 | .init = NULL, | 361 | .init = NULL, |
276 | .cleanup = NULL, | 362 | .cleanup = NULL, |
277 | .readlock = rcu_bh_torture_read_lock, | 363 | .readlock = rcu_bh_torture_read_lock, |
364 | .readdelay = rcu_read_delay, /* just reuse rcu's version. */ | ||
278 | .readunlock = rcu_bh_torture_read_unlock, | 365 | .readunlock = rcu_bh_torture_read_unlock, |
279 | .completed = rcu_bh_torture_completed, | 366 | .completed = rcu_bh_torture_completed, |
280 | .deferredfree = rcu_bh_torture_deferred_free, | 367 | .deferredfree = rcu_bh_torture_deferred_free, |
368 | .sync = rcu_bh_torture_synchronize, | ||
281 | .stats = NULL, | 369 | .stats = NULL, |
282 | .name = "rcu_bh" | 370 | .name = "rcu_bh" |
283 | }; | 371 | }; |
284 | 372 | ||
373 | static struct rcu_torture_ops rcu_bh_sync_ops = { | ||
374 | .init = rcu_sync_torture_init, | ||
375 | .cleanup = NULL, | ||
376 | .readlock = rcu_bh_torture_read_lock, | ||
377 | .readdelay = rcu_read_delay, /* just reuse rcu's version. */ | ||
378 | .readunlock = rcu_bh_torture_read_unlock, | ||
379 | .completed = rcu_bh_torture_completed, | ||
380 | .deferredfree = rcu_sync_torture_deferred_free, | ||
381 | .sync = rcu_bh_torture_synchronize, | ||
382 | .stats = NULL, | ||
383 | .name = "rcu_bh_sync" | ||
384 | }; | ||
385 | |||
386 | /* | ||
387 | * Definitions for srcu torture testing. | ||
388 | */ | ||
389 | |||
390 | static struct srcu_struct srcu_ctl; | ||
391 | |||
392 | static void srcu_torture_init(void) | ||
393 | { | ||
394 | init_srcu_struct(&srcu_ctl); | ||
395 | rcu_sync_torture_init(); | ||
396 | } | ||
397 | |||
398 | static void srcu_torture_cleanup(void) | ||
399 | { | ||
400 | synchronize_srcu(&srcu_ctl); | ||
401 | cleanup_srcu_struct(&srcu_ctl); | ||
402 | } | ||
403 | |||
404 | static int srcu_torture_read_lock(void) | ||
405 | { | ||
406 | return srcu_read_lock(&srcu_ctl); | ||
407 | } | ||
408 | |||
409 | static void srcu_read_delay(struct rcu_random_state *rrsp) | ||
410 | { | ||
411 | long delay; | ||
412 | const long uspertick = 1000000 / HZ; | ||
413 | const long longdelay = 10; | ||
414 | |||
415 | /* We want there to be long-running readers, but not all the time. */ | ||
416 | |||
417 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); | ||
418 | if (!delay) | ||
419 | schedule_timeout_interruptible(longdelay); | ||
420 | } | ||
421 | |||
422 | static void srcu_torture_read_unlock(int idx) | ||
423 | { | ||
424 | srcu_read_unlock(&srcu_ctl, idx); | ||
425 | } | ||
426 | |||
427 | static int srcu_torture_completed(void) | ||
428 | { | ||
429 | return srcu_batches_completed(&srcu_ctl); | ||
430 | } | ||
431 | |||
432 | static void srcu_torture_synchronize(void) | ||
433 | { | ||
434 | synchronize_srcu(&srcu_ctl); | ||
435 | } | ||
436 | |||
437 | static int srcu_torture_stats(char *page) | ||
438 | { | ||
439 | int cnt = 0; | ||
440 | int cpu; | ||
441 | int idx = srcu_ctl.completed & 0x1; | ||
442 | |||
443 | cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", | ||
444 | torture_type, TORTURE_FLAG, idx); | ||
445 | for_each_possible_cpu(cpu) { | ||
446 | cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, | ||
447 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], | ||
448 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); | ||
449 | } | ||
450 | cnt += sprintf(&page[cnt], "\n"); | ||
451 | return cnt; | ||
452 | } | ||
453 | |||
454 | static struct rcu_torture_ops srcu_ops = { | ||
455 | .init = srcu_torture_init, | ||
456 | .cleanup = srcu_torture_cleanup, | ||
457 | .readlock = srcu_torture_read_lock, | ||
458 | .readdelay = srcu_read_delay, | ||
459 | .readunlock = srcu_torture_read_unlock, | ||
460 | .completed = srcu_torture_completed, | ||
461 | .deferredfree = rcu_sync_torture_deferred_free, | ||
462 | .sync = srcu_torture_synchronize, | ||
463 | .stats = srcu_torture_stats, | ||
464 | .name = "srcu" | ||
465 | }; | ||
466 | |||
467 | /* | ||
468 | * Definitions for sched torture testing. | ||
469 | */ | ||
470 | |||
471 | static int sched_torture_read_lock(void) | ||
472 | { | ||
473 | preempt_disable(); | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | static void sched_torture_read_unlock(int idx) | ||
478 | { | ||
479 | preempt_enable(); | ||
480 | } | ||
481 | |||
482 | static int sched_torture_completed(void) | ||
483 | { | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | static void sched_torture_synchronize(void) | ||
488 | { | ||
489 | synchronize_sched(); | ||
490 | } | ||
491 | |||
492 | static struct rcu_torture_ops sched_ops = { | ||
493 | .init = rcu_sync_torture_init, | ||
494 | .cleanup = NULL, | ||
495 | .readlock = sched_torture_read_lock, | ||
496 | .readdelay = rcu_read_delay, /* just reuse rcu's version. */ | ||
497 | .readunlock = sched_torture_read_unlock, | ||
498 | .completed = sched_torture_completed, | ||
499 | .deferredfree = rcu_sync_torture_deferred_free, | ||
500 | .sync = sched_torture_synchronize, | ||
501 | .stats = NULL, | ||
502 | .name = "sched" | ||
503 | }; | ||
504 | |||
285 | static struct rcu_torture_ops *torture_ops[] = | 505 | static struct rcu_torture_ops *torture_ops[] = |
286 | { &rcu_ops, &rcu_bh_ops, NULL }; | 506 | { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops, |
507 | &sched_ops, NULL }; | ||
287 | 508 | ||
288 | /* | 509 | /* |
289 | * RCU torture writer kthread. Repeatedly substitutes a new structure | 510 | * RCU torture writer kthread. Repeatedly substitutes a new structure |
@@ -330,6 +551,30 @@ rcu_torture_writer(void *arg) | |||
330 | } | 551 | } |
331 | 552 | ||
332 | /* | 553 | /* |
554 | * RCU torture fake writer kthread. Repeatedly calls sync, with a random | ||
555 | * delay between calls. | ||
556 | */ | ||
557 | static int | ||
558 | rcu_torture_fakewriter(void *arg) | ||
559 | { | ||
560 | DEFINE_RCU_RANDOM(rand); | ||
561 | |||
562 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); | ||
563 | set_user_nice(current, 19); | ||
564 | |||
565 | do { | ||
566 | schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); | ||
567 | udelay(rcu_random(&rand) & 0x3ff); | ||
568 | cur_ops->sync(); | ||
569 | } while (!kthread_should_stop() && !fullstop); | ||
570 | |||
571 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); | ||
572 | while (!kthread_should_stop()) | ||
573 | schedule_timeout_uninterruptible(1); | ||
574 | return 0; | ||
575 | } | ||
576 | |||
577 | /* | ||
333 | * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, | 578 | * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, |
334 | * incrementing the corresponding element of the pipeline array. The | 579 | * incrementing the corresponding element of the pipeline array. The |
335 | * counter in the element should never be greater than 1, otherwise, the | 580 | * counter in the element should never be greater than 1, otherwise, the |
@@ -359,7 +604,7 @@ rcu_torture_reader(void *arg) | |||
359 | } | 604 | } |
360 | if (p->rtort_mbtest == 0) | 605 | if (p->rtort_mbtest == 0) |
361 | atomic_inc(&n_rcu_torture_mberror); | 606 | atomic_inc(&n_rcu_torture_mberror); |
362 | udelay(rcu_random(&rand) & 0x7f); | 607 | cur_ops->readdelay(&rand); |
363 | preempt_disable(); | 608 | preempt_disable(); |
364 | pipe_count = p->rtort_pipe_count; | 609 | pipe_count = p->rtort_pipe_count; |
365 | if (pipe_count > RCU_TORTURE_PIPE_LEN) { | 610 | if (pipe_count > RCU_TORTURE_PIPE_LEN) { |
@@ -483,7 +728,7 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ | |||
483 | /* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case | 728 | /* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case |
484 | * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. | 729 | * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. |
485 | */ | 730 | */ |
486 | void rcu_torture_shuffle_tasks(void) | 731 | static void rcu_torture_shuffle_tasks(void) |
487 | { | 732 | { |
488 | cpumask_t tmp_mask = CPU_MASK_ALL; | 733 | cpumask_t tmp_mask = CPU_MASK_ALL; |
489 | int i; | 734 | int i; |
@@ -507,6 +752,12 @@ void rcu_torture_shuffle_tasks(void) | |||
507 | set_cpus_allowed(reader_tasks[i], tmp_mask); | 752 | set_cpus_allowed(reader_tasks[i], tmp_mask); |
508 | } | 753 | } |
509 | 754 | ||
755 | if (fakewriter_tasks != NULL) { | ||
756 | for (i = 0; i < nfakewriters; i++) | ||
757 | if (fakewriter_tasks[i]) | ||
758 | set_cpus_allowed(fakewriter_tasks[i], tmp_mask); | ||
759 | } | ||
760 | |||
510 | if (writer_task) | 761 | if (writer_task) |
511 | set_cpus_allowed(writer_task, tmp_mask); | 762 | set_cpus_allowed(writer_task, tmp_mask); |
512 | 763 | ||
@@ -540,11 +791,12 @@ rcu_torture_shuffle(void *arg) | |||
540 | static inline void | 791 | static inline void |
541 | rcu_torture_print_module_parms(char *tag) | 792 | rcu_torture_print_module_parms(char *tag) |
542 | { | 793 | { |
543 | printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d " | 794 | printk(KERN_ALERT "%s" TORTURE_FLAG |
795 | "--- %s: nreaders=%d nfakewriters=%d " | ||
544 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " | 796 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " |
545 | "shuffle_interval = %d\n", | 797 | "shuffle_interval = %d\n", |
546 | torture_type, tag, nrealreaders, stat_interval, verbose, | 798 | torture_type, tag, nrealreaders, nfakewriters, |
547 | test_no_idle_hz, shuffle_interval); | 799 | stat_interval, verbose, test_no_idle_hz, shuffle_interval); |
548 | } | 800 | } |
549 | 801 | ||
550 | static void | 802 | static void |
@@ -579,6 +831,19 @@ rcu_torture_cleanup(void) | |||
579 | } | 831 | } |
580 | rcu_torture_current = NULL; | 832 | rcu_torture_current = NULL; |
581 | 833 | ||
834 | if (fakewriter_tasks != NULL) { | ||
835 | for (i = 0; i < nfakewriters; i++) { | ||
836 | if (fakewriter_tasks[i] != NULL) { | ||
837 | VERBOSE_PRINTK_STRING( | ||
838 | "Stopping rcu_torture_fakewriter task"); | ||
839 | kthread_stop(fakewriter_tasks[i]); | ||
840 | } | ||
841 | fakewriter_tasks[i] = NULL; | ||
842 | } | ||
843 | kfree(fakewriter_tasks); | ||
844 | fakewriter_tasks = NULL; | ||
845 | } | ||
846 | |||
582 | if (stats_task != NULL) { | 847 | if (stats_task != NULL) { |
583 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); | 848 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); |
584 | kthread_stop(stats_task); | 849 | kthread_stop(stats_task); |
@@ -666,7 +931,25 @@ rcu_torture_init(void) | |||
666 | writer_task = NULL; | 931 | writer_task = NULL; |
667 | goto unwind; | 932 | goto unwind; |
668 | } | 933 | } |
669 | reader_tasks = kmalloc(nrealreaders * sizeof(reader_tasks[0]), | 934 | fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), |
935 | GFP_KERNEL); | ||
936 | if (fakewriter_tasks == NULL) { | ||
937 | VERBOSE_PRINTK_ERRSTRING("out of memory"); | ||
938 | firsterr = -ENOMEM; | ||
939 | goto unwind; | ||
940 | } | ||
941 | for (i = 0; i < nfakewriters; i++) { | ||
942 | VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); | ||
943 | fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, | ||
944 | "rcu_torture_fakewriter"); | ||
945 | if (IS_ERR(fakewriter_tasks[i])) { | ||
946 | firsterr = PTR_ERR(fakewriter_tasks[i]); | ||
947 | VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); | ||
948 | fakewriter_tasks[i] = NULL; | ||
949 | goto unwind; | ||
950 | } | ||
951 | } | ||
952 | reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), | ||
670 | GFP_KERNEL); | 953 | GFP_KERNEL); |
671 | if (reader_tasks == NULL) { | 954 | if (reader_tasks == NULL) { |
672 | VERBOSE_PRINTK_ERRSTRING("out of memory"); | 955 | VERBOSE_PRINTK_ERRSTRING("out of memory"); |
diff --git a/kernel/srcu.c b/kernel/srcu.c new file mode 100644 index 000000000000..3507cabe963b --- /dev/null +++ b/kernel/srcu.c | |||
@@ -0,0 +1,258 @@ | |||
1 | /* | ||
2 | * Sleepable Read-Copy Update mechanism for mutual exclusion. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2006 | ||
19 | * | ||
20 | * Author: Paul McKenney <paulmck@us.ibm.com> | ||
21 | * | ||
22 | * For detailed explanation of Read-Copy Update mechanism see - | ||
23 | * Documentation/RCU/ *.txt | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/mutex.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/preempt.h> | ||
31 | #include <linux/rcupdate.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/srcu.h> | ||
36 | |||
37 | /** | ||
38 | * init_srcu_struct - initialize a sleep-RCU structure | ||
39 | * @sp: structure to initialize. | ||
40 | * | ||
41 | * Must invoke this on a given srcu_struct before passing that srcu_struct | ||
42 | * to any other function. Each srcu_struct represents a separate domain | ||
43 | * of SRCU protection. | ||
44 | */ | ||
45 | int init_srcu_struct(struct srcu_struct *sp) | ||
46 | { | ||
47 | sp->completed = 0; | ||
48 | mutex_init(&sp->mutex); | ||
49 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); | ||
50 | return (sp->per_cpu_ref ? 0 : -ENOMEM); | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * srcu_readers_active_idx -- returns approximate number of readers | ||
55 | * active on the specified rank of per-CPU counters. | ||
56 | */ | ||
57 | |||
58 | static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | ||
59 | { | ||
60 | int cpu; | ||
61 | int sum; | ||
62 | |||
63 | sum = 0; | ||
64 | for_each_possible_cpu(cpu) | ||
65 | sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; | ||
66 | return sum; | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * srcu_readers_active - returns approximate number of readers. | ||
71 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | ||
72 | * | ||
73 | * Note that this is not an atomic primitive, and can therefore suffer | ||
74 | * severe errors when invoked on an active srcu_struct. That said, it | ||
75 | * can be useful as an error check at cleanup time. | ||
76 | */ | ||
77 | int srcu_readers_active(struct srcu_struct *sp) | ||
78 | { | ||
79 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * cleanup_srcu_struct - deconstruct a sleep-RCU structure | ||
84 | * @sp: structure to clean up. | ||
85 | * | ||
86 | * Must invoke this after you are finished using a given srcu_struct that | ||
87 | * was initialized via init_srcu_struct(), else you leak memory. | ||
88 | */ | ||
89 | void cleanup_srcu_struct(struct srcu_struct *sp) | ||
90 | { | ||
91 | int sum; | ||
92 | |||
93 | sum = srcu_readers_active(sp); | ||
94 | WARN_ON(sum); /* Leakage unless caller handles error. */ | ||
95 | if (sum != 0) | ||
96 | return; | ||
97 | free_percpu(sp->per_cpu_ref); | ||
98 | sp->per_cpu_ref = NULL; | ||
99 | } | ||
100 | |||
101 | /** | ||
102 | * srcu_read_lock - register a new reader for an SRCU-protected structure. | ||
103 | * @sp: srcu_struct in which to register the new reader. | ||
104 | * | ||
105 | * Counts the new reader in the appropriate per-CPU element of the | ||
106 | * srcu_struct. Must be called from process context. | ||
107 | * Returns an index that must be passed to the matching srcu_read_unlock(). | ||
108 | */ | ||
109 | int srcu_read_lock(struct srcu_struct *sp) | ||
110 | { | ||
111 | int idx; | ||
112 | |||
113 | preempt_disable(); | ||
114 | idx = sp->completed & 0x1; | ||
115 | barrier(); /* ensure compiler looks -once- at sp->completed. */ | ||
116 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; | ||
117 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | ||
118 | preempt_enable(); | ||
119 | return idx; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. | ||
124 | * @sp: srcu_struct in which to unregister the old reader. | ||
125 | * @idx: return value from corresponding srcu_read_lock(). | ||
126 | * | ||
127 | * Removes the count for the old reader from the appropriate per-CPU | ||
128 | * element of the srcu_struct. Note that this may well be a different | ||
129 | * CPU than that which was incremented by the corresponding srcu_read_lock(). | ||
130 | * Must be called from process context. | ||
131 | */ | ||
132 | void srcu_read_unlock(struct srcu_struct *sp, int idx) | ||
133 | { | ||
134 | preempt_disable(); | ||
135 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | ||
136 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; | ||
137 | preempt_enable(); | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * synchronize_srcu - wait for prior SRCU read-side critical-section completion | ||
142 | * @sp: srcu_struct with which to synchronize. | ||
143 | * | ||
144 | * Flip the completed counter, and wait for the old count to drain to zero. | ||
145 | * As with classic RCU, the updater must use some separate means of | ||
146 | * synchronizing concurrent updates. Can block; must be called from | ||
147 | * process context. | ||
148 | * | ||
149 | * Note that it is illegal to call synchornize_srcu() from the corresponding | ||
150 | * SRCU read-side critical section; doing so will result in deadlock. | ||
151 | * However, it is perfectly legal to call synchronize_srcu() on one | ||
152 | * srcu_struct from some other srcu_struct's read-side critical section. | ||
153 | */ | ||
154 | void synchronize_srcu(struct srcu_struct *sp) | ||
155 | { | ||
156 | int idx; | ||
157 | |||
158 | idx = sp->completed; | ||
159 | mutex_lock(&sp->mutex); | ||
160 | |||
161 | /* | ||
162 | * Check to see if someone else did the work for us while we were | ||
163 | * waiting to acquire the lock. We need -two- advances of | ||
164 | * the counter, not just one. If there was but one, we might have | ||
165 | * shown up -after- our helper's first synchronize_sched(), thus | ||
166 | * having failed to prevent CPU-reordering races with concurrent | ||
167 | * srcu_read_unlock()s on other CPUs (see comment below). So we | ||
168 | * either (1) wait for two or (2) supply the second ourselves. | ||
169 | */ | ||
170 | |||
171 | if ((sp->completed - idx) >= 2) { | ||
172 | mutex_unlock(&sp->mutex); | ||
173 | return; | ||
174 | } | ||
175 | |||
176 | synchronize_sched(); /* Force memory barrier on all CPUs. */ | ||
177 | |||
178 | /* | ||
179 | * The preceding synchronize_sched() ensures that any CPU that | ||
180 | * sees the new value of sp->completed will also see any preceding | ||
181 | * changes to data structures made by this CPU. This prevents | ||
182 | * some other CPU from reordering the accesses in its SRCU | ||
183 | * read-side critical section to precede the corresponding | ||
184 | * srcu_read_lock() -- ensuring that such references will in | ||
185 | * fact be protected. | ||
186 | * | ||
187 | * So it is now safe to do the flip. | ||
188 | */ | ||
189 | |||
190 | idx = sp->completed & 0x1; | ||
191 | sp->completed++; | ||
192 | |||
193 | synchronize_sched(); /* Force memory barrier on all CPUs. */ | ||
194 | |||
195 | /* | ||
196 | * At this point, because of the preceding synchronize_sched(), | ||
197 | * all srcu_read_lock() calls using the old counters have completed. | ||
198 | * Their corresponding critical sections might well be still | ||
199 | * executing, but the srcu_read_lock() primitives themselves | ||
200 | * will have finished executing. | ||
201 | */ | ||
202 | |||
203 | while (srcu_readers_active_idx(sp, idx)) | ||
204 | schedule_timeout_interruptible(1); | ||
205 | |||
206 | synchronize_sched(); /* Force memory barrier on all CPUs. */ | ||
207 | |||
208 | /* | ||
209 | * The preceding synchronize_sched() forces all srcu_read_unlock() | ||
210 | * primitives that were executing concurrently with the preceding | ||
211 | * for_each_possible_cpu() loop to have completed by this point. | ||
212 | * More importantly, it also forces the corresponding SRCU read-side | ||
213 | * critical sections to have also completed, and the corresponding | ||
214 | * references to SRCU-protected data items to be dropped. | ||
215 | * | ||
216 | * Note: | ||
217 | * | ||
218 | * Despite what you might think at first glance, the | ||
219 | * preceding synchronize_sched() -must- be within the | ||
220 | * critical section ended by the following mutex_unlock(). | ||
221 | * Otherwise, a task taking the early exit can race | ||
222 | * with a srcu_read_unlock(), which might have executed | ||
223 | * just before the preceding srcu_readers_active() check, | ||
224 | * and whose CPU might have reordered the srcu_read_unlock() | ||
225 | * with the preceding critical section. In this case, there | ||
226 | * is nothing preventing the synchronize_sched() task that is | ||
227 | * taking the early exit from freeing a data structure that | ||
228 | * is still being referenced (out of order) by the task | ||
229 | * doing the srcu_read_unlock(). | ||
230 | * | ||
231 | * Alternatively, the comparison with "2" on the early exit | ||
232 | * could be changed to "3", but this increases synchronize_srcu() | ||
233 | * latency for bulk loads. So the current code is preferred. | ||
234 | */ | ||
235 | |||
236 | mutex_unlock(&sp->mutex); | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * srcu_batches_completed - return batches completed. | ||
241 | * @sp: srcu_struct on which to report batch completion. | ||
242 | * | ||
243 | * Report the number of batches, correlated with, but not necessarily | ||
244 | * precisely the same as, the number of grace periods that have elapsed. | ||
245 | */ | ||
246 | |||
247 | long srcu_batches_completed(struct srcu_struct *sp) | ||
248 | { | ||
249 | return sp->completed; | ||
250 | } | ||
251 | |||
252 | EXPORT_SYMBOL_GPL(init_srcu_struct); | ||
253 | EXPORT_SYMBOL_GPL(cleanup_srcu_struct); | ||
254 | EXPORT_SYMBOL_GPL(srcu_read_lock); | ||
255 | EXPORT_SYMBOL_GPL(srcu_read_unlock); | ||
256 | EXPORT_SYMBOL_GPL(synchronize_srcu); | ||
257 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | ||
258 | EXPORT_SYMBOL_GPL(srcu_readers_active); | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 2314867ae34f..98489d82801b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -153,7 +153,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, | |||
153 | 153 | ||
154 | /* | 154 | /* |
155 | * Atomic notifier chain routines. Registration and unregistration | 155 | * Atomic notifier chain routines. Registration and unregistration |
156 | * use a mutex, and call_chain is synchronized by RCU (no locks). | 156 | * use a spinlock, and call_chain is synchronized by RCU (no locks). |
157 | */ | 157 | */ |
158 | 158 | ||
159 | /** | 159 | /** |
@@ -401,6 +401,129 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh, | |||
401 | 401 | ||
402 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); | 402 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); |
403 | 403 | ||
404 | /* | ||
405 | * SRCU notifier chain routines. Registration and unregistration | ||
406 | * use a mutex, and call_chain is synchronized by SRCU (no locks). | ||
407 | */ | ||
408 | |||
409 | /** | ||
410 | * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain | ||
411 | * @nh: Pointer to head of the SRCU notifier chain | ||
412 | * @n: New entry in notifier chain | ||
413 | * | ||
414 | * Adds a notifier to an SRCU notifier chain. | ||
415 | * Must be called in process context. | ||
416 | * | ||
417 | * Currently always returns zero. | ||
418 | */ | ||
419 | |||
420 | int srcu_notifier_chain_register(struct srcu_notifier_head *nh, | ||
421 | struct notifier_block *n) | ||
422 | { | ||
423 | int ret; | ||
424 | |||
425 | /* | ||
426 | * This code gets used during boot-up, when task switching is | ||
427 | * not yet working and interrupts must remain disabled. At | ||
428 | * such times we must not call mutex_lock(). | ||
429 | */ | ||
430 | if (unlikely(system_state == SYSTEM_BOOTING)) | ||
431 | return notifier_chain_register(&nh->head, n); | ||
432 | |||
433 | mutex_lock(&nh->mutex); | ||
434 | ret = notifier_chain_register(&nh->head, n); | ||
435 | mutex_unlock(&nh->mutex); | ||
436 | return ret; | ||
437 | } | ||
438 | |||
439 | EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); | ||
440 | |||
441 | /** | ||
442 | * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain | ||
443 | * @nh: Pointer to head of the SRCU notifier chain | ||
444 | * @n: Entry to remove from notifier chain | ||
445 | * | ||
446 | * Removes a notifier from an SRCU notifier chain. | ||
447 | * Must be called from process context. | ||
448 | * | ||
449 | * Returns zero on success or %-ENOENT on failure. | ||
450 | */ | ||
451 | int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, | ||
452 | struct notifier_block *n) | ||
453 | { | ||
454 | int ret; | ||
455 | |||
456 | /* | ||
457 | * This code gets used during boot-up, when task switching is | ||
458 | * not yet working and interrupts must remain disabled. At | ||
459 | * such times we must not call mutex_lock(). | ||
460 | */ | ||
461 | if (unlikely(system_state == SYSTEM_BOOTING)) | ||
462 | return notifier_chain_unregister(&nh->head, n); | ||
463 | |||
464 | mutex_lock(&nh->mutex); | ||
465 | ret = notifier_chain_unregister(&nh->head, n); | ||
466 | mutex_unlock(&nh->mutex); | ||
467 | synchronize_srcu(&nh->srcu); | ||
468 | return ret; | ||
469 | } | ||
470 | |||
471 | EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); | ||
472 | |||
473 | /** | ||
474 | * srcu_notifier_call_chain - Call functions in an SRCU notifier chain | ||
475 | * @nh: Pointer to head of the SRCU notifier chain | ||
476 | * @val: Value passed unmodified to notifier function | ||
477 | * @v: Pointer passed unmodified to notifier function | ||
478 | * | ||
479 | * Calls each function in a notifier chain in turn. The functions | ||
480 | * run in a process context, so they are allowed to block. | ||
481 | * | ||
482 | * If the return value of the notifier can be and'ed | ||
483 | * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain | ||
484 | * will return immediately, with the return value of | ||
485 | * the notifier function which halted execution. | ||
486 | * Otherwise the return value is the return value | ||
487 | * of the last notifier function called. | ||
488 | */ | ||
489 | |||
490 | int srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||
491 | unsigned long val, void *v) | ||
492 | { | ||
493 | int ret; | ||
494 | int idx; | ||
495 | |||
496 | idx = srcu_read_lock(&nh->srcu); | ||
497 | ret = notifier_call_chain(&nh->head, val, v); | ||
498 | srcu_read_unlock(&nh->srcu, idx); | ||
499 | return ret; | ||
500 | } | ||
501 | |||
502 | EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); | ||
503 | |||
504 | /** | ||
505 | * srcu_init_notifier_head - Initialize an SRCU notifier head | ||
506 | * @nh: Pointer to head of the srcu notifier chain | ||
507 | * | ||
508 | * Unlike other sorts of notifier heads, SRCU notifier heads require | ||
509 | * dynamic initialization. Be sure to call this routine before | ||
510 | * calling any of the other SRCU notifier routines for this head. | ||
511 | * | ||
512 | * If an SRCU notifier head is deallocated, it must first be cleaned | ||
513 | * up by calling srcu_cleanup_notifier_head(). Otherwise the head's | ||
514 | * per-cpu data (used by the SRCU mechanism) will leak. | ||
515 | */ | ||
516 | |||
517 | void srcu_init_notifier_head(struct srcu_notifier_head *nh) | ||
518 | { | ||
519 | mutex_init(&nh->mutex); | ||
520 | if (init_srcu_struct(&nh->srcu) < 0) | ||
521 | BUG(); | ||
522 | nh->head = NULL; | ||
523 | } | ||
524 | |||
525 | EXPORT_SYMBOL_GPL(srcu_init_notifier_head); | ||
526 | |||
404 | /** | 527 | /** |
405 | * register_reboot_notifier - Register function to be called at reboot time | 528 | * register_reboot_notifier - Register function to be called at reboot time |
406 | * @nb: Info about notifier function to be called | 529 | * @nb: Info about notifier function to be called |