aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-08-23 05:32:34 -0400
committerIngo Molnar <mingo@elte.hu>2010-08-23 05:32:34 -0400
commita6b9b4d50f492630443b38404d1f436b3b748c14 (patch)
treef3512389c42cecfae50b6a315ec6ab1fa470e30d
parente36c886a0f9d624377977fa6cae309cfd7f362fa (diff)
parent28457ee69c473a903e51e26c7bcd6f1e9eceb93e (diff)
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu into core/rcu
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl14
-rw-r--r--Documentation/RCU/checklist.txt46
-rw-r--r--drivers/input/evdev.c2
-rw-r--r--drivers/vhost/net.c16
-rw-r--r--drivers/vhost/vhost.c22
-rw-r--r--drivers/vhost/vhost.h10
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/fdtable.h6
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h6
-rw-r--r--include/linux/hardirq.h2
-rw-r--r--include/linux/idr.h4
-rw-r--r--include/linux/init_task.h14
-rw-r--r--include/linux/input.h2
-rw-r--r--include/linux/iocontext.h2
-rw-r--r--include/linux/key.h3
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/notifier.h10
-rw-r--r--include/linux/radix-tree.h4
-rw-r--r--include/linux/rculist.h62
-rw-r--r--include/linux/rculist_nulls.h16
-rw-r--r--include/linux/rcupdate.h482
-rw-r--r--include/linux/rcutiny.h104
-rw-r--r--include/linux/rcutree.h57
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/srcu.h34
-rw-r--r--include/linux/sunrpc/auth_gss.h4
-rw-r--r--include/net/cls_cgroup.h3
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--init/Kconfig26
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/pid.c3
-rw-r--r--kernel/rcupdate.c6
-rw-r--r--kernel/rcutiny.c33
-rw-r--r--kernel/rcutiny_plugin.h578
-rw-r--r--kernel/rcutorture.c6
-rw-r--r--kernel/rcutree.c85
-rw-r--r--kernel/rcutree.h17
-rw-r--r--kernel/rcutree_plugin.h45
-rw-r--r--kernel/rcutree_trace.c2
-rw-r--r--lib/Kconfig.debug37
-rw-r--r--lib/radix-tree.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/nf_conntrack_ecache.c4
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c4
-rw-r--r--net/netfilter/nf_log.c2
-rw-r--r--net/netfilter/nf_queue.c2
54 files changed, 1364 insertions, 458 deletions
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 0b1a3f97f28..d7884b13fb1 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1645,7 +1645,9 @@ the amount of locking which needs to be done.
1645 all the readers who were traversing the list when we deleted the 1645 all the readers who were traversing the list when we deleted the
1646 element are finished. We use <function>call_rcu()</function> to 1646 element are finished. We use <function>call_rcu()</function> to
1647 register a callback which will actually destroy the object once 1647 register a callback which will actually destroy the object once
1648 the readers are finished. 1648 all pre-existing readers are finished. Alternatively,
1649 <function>synchronize_rcu()</function> may be used to block until
1650 all pre-existing are finished.
1649 </para> 1651 </para>
1650 <para> 1652 <para>
1651 But how does Read Copy Update know when the readers are 1653 But how does Read Copy Update know when the readers are
@@ -1714,7 +1716,7 @@ the amount of locking which needs to be done.
1714- object_put(obj); 1716- object_put(obj);
1715+ list_del_rcu(&amp;obj-&gt;list); 1717+ list_del_rcu(&amp;obj-&gt;list);
1716 cache_num--; 1718 cache_num--;
1717+ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu, obj); 1719+ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu);
1718 } 1720 }
1719 1721
1720 /* Must be holding cache_lock */ 1722 /* Must be holding cache_lock */
@@ -1725,14 +1727,6 @@ the amount of locking which needs to be done.
1725 if (++cache_num > MAX_CACHE_SIZE) { 1727 if (++cache_num > MAX_CACHE_SIZE) {
1726 struct object *i, *outcast = NULL; 1728 struct object *i, *outcast = NULL;
1727 list_for_each_entry(i, &amp;cache, list) { 1729 list_for_each_entry(i, &amp;cache, list) {
1728@@ -85,6 +94,7 @@
1729 obj-&gt;popularity = 0;
1730 atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
1731 spin_lock_init(&amp;obj-&gt;lock);
1732+ INIT_RCU_HEAD(&amp;obj-&gt;rcu);
1733
1734 spin_lock_irqsave(&amp;cache_lock, flags);
1735 __cache_add(obj);
1736@@ -104,12 +114,11 @@ 1730@@ -104,12 +114,11 @@
1737 struct object *cache_find(int id) 1731 struct object *cache_find(int id)
1738 { 1732 {
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 790d1a81237..0c134f8afc6 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome!
218 include: 218 include:
219 219
220 a. Keeping a count of the number of data-structure elements 220 a. Keeping a count of the number of data-structure elements
221 used by the RCU-protected data structure, including those 221 used by the RCU-protected data structure, including
222 waiting for a grace period to elapse. Enforce a limit 222 those waiting for a grace period to elapse. Enforce a
223 on this number, stalling updates as needed to allow 223 limit on this number, stalling updates as needed to allow
224 previously deferred frees to complete. 224 previously deferred frees to complete. Alternatively,
225 225 limit only the number awaiting deferred free rather than
226 Alternatively, limit only the number awaiting deferred 226 the total number of elements.
227 free rather than the total number of elements. 227
228 One way to stall the updates is to acquire the update-side
229 mutex. (Don't try this with a spinlock -- other CPUs
230 spinning on the lock could prevent the grace period
231 from ever ending.) Another way to stall the updates
232 is for the updates to use a wrapper function around
233 the memory allocator, so that this wrapper function
234 simulates OOM when there is too much memory awaiting an
235 RCU grace period. There are of course many other
236 variations on this theme.
228 237
229 b. Limiting update rate. For example, if updates occur only 238 b. Limiting update rate. For example, if updates occur only
230 once per hour, then no explicit rate limiting is required, 239 once per hour, then no explicit rate limiting is required,
@@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome!
365 and the compiler to freely reorder code into and out of RCU 374 and the compiler to freely reorder code into and out of RCU
366 read-side critical sections. It is the responsibility of the 375 read-side critical sections. It is the responsibility of the
367 RCU update-side primitives to deal with this. 376 RCU update-side primitives to deal with this.
377
37817. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and
379 the __rcu sparse checks to validate your RCU code. These
380 can help find problems as follows:
381
382 CONFIG_PROVE_RCU: check that accesses to RCU-protected data
383 structures are carried out under the proper RCU
384 read-side critical section, while holding the right
385 combination of locks, or whatever other conditions
386 are appropriate.
387
388 CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
389 same object to call_rcu() (or friends) before an RCU
390 grace period has elapsed since the last time that you
391 passed that same object to call_rcu() (or friends).
392
393 __rcu sparse checks: tag the pointer to the RCU-protected data
394 structure with __rcu, and sparse will warn you if you
395 access that pointer without the services of one of the
396 variants of rcu_dereference().
397
398 These debugging aids can help you find problems that are
399 otherwise extremely difficult to spot.
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c908c5f8364..5808731f72d 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -28,7 +28,7 @@ struct evdev {
28 int minor; 28 int minor;
29 struct input_handle handle; 29 struct input_handle handle;
30 wait_queue_head_t wait; 30 wait_queue_head_t wait;
31 struct evdev_client *grab; 31 struct evdev_client __rcu *grab;
32 struct list_head client_list; 32 struct list_head client_list;
33 spinlock_t client_lock; /* protects client_list */ 33 spinlock_t client_lock; /* protects client_list */
34 struct mutex mutex; 34 struct mutex mutex;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 29e850a7a2f..1318ee00834 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net)
127 size_t len, total_len = 0; 127 size_t len, total_len = 0;
128 int err, wmem; 128 int err, wmem;
129 size_t hdr_size; 129 size_t hdr_size;
130 struct socket *sock = rcu_dereference(vq->private_data); 130 struct socket *sock;
131
132 sock = rcu_dereference_check(vq->private_data,
133 lockdep_is_held(&vq->mutex));
131 if (!sock) 134 if (!sock)
132 return; 135 return;
133 136
@@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n,
582static void vhost_net_enable_vq(struct vhost_net *n, 585static void vhost_net_enable_vq(struct vhost_net *n,
583 struct vhost_virtqueue *vq) 586 struct vhost_virtqueue *vq)
584{ 587{
585 struct socket *sock = vq->private_data; 588 struct socket *sock;
589
590 sock = rcu_dereference_protected(vq->private_data,
591 lockdep_is_held(&vq->mutex));
586 if (!sock) 592 if (!sock)
587 return; 593 return;
588 if (vq == n->vqs + VHOST_NET_VQ_TX) { 594 if (vq == n->vqs + VHOST_NET_VQ_TX) {
@@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
598 struct socket *sock; 604 struct socket *sock;
599 605
600 mutex_lock(&vq->mutex); 606 mutex_lock(&vq->mutex);
601 sock = vq->private_data; 607 sock = rcu_dereference_protected(vq->private_data,
608 lockdep_is_held(&vq->mutex));
602 vhost_net_disable_vq(n, vq); 609 vhost_net_disable_vq(n, vq);
603 rcu_assign_pointer(vq->private_data, NULL); 610 rcu_assign_pointer(vq->private_data, NULL);
604 mutex_unlock(&vq->mutex); 611 mutex_unlock(&vq->mutex);
@@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
736 } 743 }
737 744
738 /* start polling new socket */ 745 /* start polling new socket */
739 oldsock = vq->private_data; 746 oldsock = rcu_dereference_protected(vq->private_data,
747 lockdep_is_held(&vq->mutex));
740 if (sock != oldsock) { 748 if (sock != oldsock) {
741 vhost_net_disable_vq(n, vq); 749 vhost_net_disable_vq(n, vq);
742 rcu_assign_pointer(vq->private_data, sock); 750 rcu_assign_pointer(vq->private_data, sock);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e05557d5299..b5c49478d20 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -284,7 +284,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
284 vhost_dev_cleanup(dev); 284 vhost_dev_cleanup(dev);
285 285
286 memory->nregions = 0; 286 memory->nregions = 0;
287 dev->memory = memory; 287 RCU_INIT_POINTER(dev->memory, memory);
288 return 0; 288 return 0;
289} 289}
290 290
@@ -316,8 +316,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
316 fput(dev->log_file); 316 fput(dev->log_file);
317 dev->log_file = NULL; 317 dev->log_file = NULL;
318 /* No one will access memory at this point */ 318 /* No one will access memory at this point */
319 kfree(dev->memory); 319 kfree(rcu_dereference_protected(dev->memory,
320 dev->memory = NULL; 320 lockdep_is_held(&dev->mutex)));
321 RCU_INIT_POINTER(dev->memory, NULL);
321 if (dev->mm) 322 if (dev->mm)
322 mmput(dev->mm); 323 mmput(dev->mm);
323 dev->mm = NULL; 324 dev->mm = NULL;
@@ -401,14 +402,22 @@ static int vq_access_ok(unsigned int num,
401/* Caller should have device mutex but not vq mutex */ 402/* Caller should have device mutex but not vq mutex */
402int vhost_log_access_ok(struct vhost_dev *dev) 403int vhost_log_access_ok(struct vhost_dev *dev)
403{ 404{
404 return memory_access_ok(dev, dev->memory, 1); 405 struct vhost_memory *mp;
406
407 mp = rcu_dereference_protected(dev->memory,
408 lockdep_is_held(&dev->mutex));
409 return memory_access_ok(dev, mp, 1);
405} 410}
406 411
407/* Verify access for write logging. */ 412/* Verify access for write logging. */
408/* Caller should have vq mutex and device mutex */ 413/* Caller should have vq mutex and device mutex */
409static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) 414static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
410{ 415{
411 return vq_memory_access_ok(log_base, vq->dev->memory, 416 struct vhost_memory *mp;
417
418 mp = rcu_dereference_protected(vq->dev->memory,
419 lockdep_is_held(&vq->mutex));
420 return vq_memory_access_ok(log_base, mp,
412 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && 421 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
413 (!vq->log_used || log_access_ok(log_base, vq->log_addr, 422 (!vq->log_used || log_access_ok(log_base, vq->log_addr,
414 sizeof *vq->used + 423 sizeof *vq->used +
@@ -448,7 +457,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
448 kfree(newmem); 457 kfree(newmem);
449 return -EFAULT; 458 return -EFAULT;
450 } 459 }
451 oldmem = d->memory; 460 oldmem = rcu_dereference_protected(d->memory,
461 lockdep_is_held(&d->mutex));
452 rcu_assign_pointer(d->memory, newmem); 462 rcu_assign_pointer(d->memory, newmem);
453 synchronize_rcu(); 463 synchronize_rcu();
454 kfree(oldmem); 464 kfree(oldmem);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index afd77295971..af3c11ded5f 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,7 +106,7 @@ struct vhost_virtqueue {
106 * vhost_work execution acts instead of rcu_read_lock() and the end of 106 * vhost_work execution acts instead of rcu_read_lock() and the end of
107 * vhost_work execution acts instead of rcu_read_lock(). 107 * vhost_work execution acts instead of rcu_read_lock().
108 * Writers use virtqueue mutex. */ 108 * Writers use virtqueue mutex. */
109 void *private_data; 109 void __rcu *private_data;
110 /* Log write descriptors */ 110 /* Log write descriptors */
111 void __user *log_base; 111 void __user *log_base;
112 struct vhost_log log[VHOST_NET_MAX_SG]; 112 struct vhost_log log[VHOST_NET_MAX_SG];
@@ -116,7 +116,7 @@ struct vhost_dev {
116 /* Readers use RCU to access memory table pointer 116 /* Readers use RCU to access memory table pointer
117 * log base pointer and features. 117 * log base pointer and features.
118 * Writers use mutex below.*/ 118 * Writers use mutex below.*/
119 struct vhost_memory *memory; 119 struct vhost_memory __rcu *memory;
120 struct mm_struct *mm; 120 struct mm_struct *mm;
121 struct mutex mutex; 121 struct mutex mutex;
122 unsigned acked_features; 122 unsigned acked_features;
@@ -173,7 +173,11 @@ enum {
173 173
174static inline int vhost_has_feature(struct vhost_dev *dev, int bit) 174static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
175{ 175{
176 unsigned acked_features = rcu_dereference(dev->acked_features); 176 unsigned acked_features;
177
178 acked_features =
179 rcu_dereference_index_check(dev->acked_features,
180 lockdep_is_held(&dev->mutex));
177 return acked_features & (1 << bit); 181 return acked_features & (1 << bit);
178} 182}
179 183
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6..3cb7d04308c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -75,7 +75,7 @@ struct cgroup_subsys_state {
75 75
76 unsigned long flags; 76 unsigned long flags;
77 /* ID for this css, if possible */ 77 /* ID for this css, if possible */
78 struct css_id *id; 78 struct css_id __rcu *id;
79}; 79};
80 80
81/* bits in struct cgroup_subsys_state flags field */ 81/* bits in struct cgroup_subsys_state flags field */
@@ -205,7 +205,7 @@ struct cgroup {
205 struct list_head children; /* my children */ 205 struct list_head children; /* my children */
206 206
207 struct cgroup *parent; /* my parent */ 207 struct cgroup *parent; /* my parent */
208 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 208 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
209 209
210 /* Private pointers for each registered subsystem */ 210 /* Private pointers for each registered subsystem */
211 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 211 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c1a62c56a66..320d6c94ff8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -16,7 +16,11 @@
16# define __release(x) __context__(x,-1) 16# define __release(x) __context__(x,-1)
17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
18# define __percpu __attribute__((noderef, address_space(3))) 18# define __percpu __attribute__((noderef, address_space(3)))
19#ifdef CONFIG_SPARSE_RCU_POINTER
20# define __rcu __attribute__((noderef, address_space(4)))
21#else
19# define __rcu 22# define __rcu
23#endif
20extern void __chk_user_ptr(const volatile void __user *); 24extern void __chk_user_ptr(const volatile void __user *);
21extern void __chk_io_ptr(const volatile void __iomem *); 25extern void __chk_io_ptr(const volatile void __iomem *);
22#else 26#else
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4d2c39573f3..4aaeab37644 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,7 +84,7 @@ struct thread_group_cred {
84 atomic_t usage; 84 atomic_t usage;
85 pid_t tgid; /* thread group process ID */ 85 pid_t tgid; /* thread group process ID */
86 spinlock_t lock; 86 spinlock_t lock;
87 struct key *session_keyring; /* keyring inherited over fork */ 87 struct key __rcu *session_keyring; /* keyring inherited over fork */
88 struct key *process_keyring; /* keyring private to this process */ 88 struct key *process_keyring; /* keyring private to this process */
89 struct rcu_head rcu; /* RCU deletion hook */ 89 struct rcu_head rcu; /* RCU deletion hook */
90}; 90};
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index f59ed297b66..133c0ba25e3 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -31,7 +31,7 @@ struct embedded_fd_set {
31 31
32struct fdtable { 32struct fdtable {
33 unsigned int max_fds; 33 unsigned int max_fds;
34 struct file ** fd; /* current fd array */ 34 struct file __rcu **fd; /* current fd array */
35 fd_set *close_on_exec; 35 fd_set *close_on_exec;
36 fd_set *open_fds; 36 fd_set *open_fds;
37 struct rcu_head rcu; 37 struct rcu_head rcu;
@@ -46,7 +46,7 @@ struct files_struct {
46 * read mostly part 46 * read mostly part
47 */ 47 */
48 atomic_t count; 48 atomic_t count;
49 struct fdtable *fdt; 49 struct fdtable __rcu *fdt;
50 struct fdtable fdtab; 50 struct fdtable fdtab;
51 /* 51 /*
52 * written part on a separate cache line in SMP 52 * written part on a separate cache line in SMP
@@ -55,7 +55,7 @@ struct files_struct {
55 int next_fd; 55 int next_fd;
56 struct embedded_fd_set close_on_exec_init; 56 struct embedded_fd_set close_on_exec_init;
57 struct embedded_fd_set open_fds_init; 57 struct embedded_fd_set open_fds_init;
58 struct file * fd_array[NR_OPEN_DEFAULT]; 58 struct file __rcu * fd_array[NR_OPEN_DEFAULT];
59}; 59};
60 60
61#define rcu_dereference_check_fdtable(files, fdtfd) \ 61#define rcu_dereference_check_fdtable(files, fdtfd) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b61475..aa3dc8d2043 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1380,7 +1380,7 @@ struct super_block {
1380 * Saved mount options for lazy filesystems using 1380 * Saved mount options for lazy filesystems using
1381 * generic_show_options() 1381 * generic_show_options()
1382 */ 1382 */
1383 char *s_options; 1383 char __rcu *s_options;
1384}; 1384};
1385 1385
1386extern struct timespec current_fs_time(struct super_block *sb); 1386extern struct timespec current_fs_time(struct super_block *sb);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5f2f4c4d8fb..af3f06b41dc 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter {
129struct disk_part_tbl { 129struct disk_part_tbl {
130 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 int len; 131 int len;
132 struct hd_struct *last_lookup; 132 struct hd_struct __rcu *last_lookup;
133 struct hd_struct *part[]; 133 struct hd_struct __rcu *part[];
134}; 134};
135 135
136struct gendisk { 136struct gendisk {
@@ -149,7 +149,7 @@ struct gendisk {
149 * non-critical accesses use RCU. Always access through 149 * non-critical accesses use RCU. Always access through
150 * helpers. 150 * helpers.
151 */ 151 */
152 struct disk_part_tbl *part_tbl; 152 struct disk_part_tbl __rcu *part_tbl;
153 struct hd_struct part0; 153 struct hd_struct part0;
154 154
155 const struct block_device_operations *fops; 155 const struct block_device_operations *fops;
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669da..1f4517d55b1 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
139#endif 139#endif
140 140
141#if defined(CONFIG_NO_HZ) 141#if defined(CONFIG_NO_HZ)
142#if defined(CONFIG_TINY_RCU) 142#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
143extern void rcu_enter_nohz(void); 143extern void rcu_enter_nohz(void);
144extern void rcu_exit_nohz(void); 144extern void rcu_exit_nohz(void);
145 145
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e968db71e33..cdb715e58e3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -50,14 +50,14 @@
50 50
51struct idr_layer { 51struct idr_layer {
52 unsigned long bitmap; /* A zero bit means "space here" */ 52 unsigned long bitmap; /* A zero bit means "space here" */
53 struct idr_layer *ary[1<<IDR_BITS]; 53 struct idr_layer __rcu *ary[1<<IDR_BITS];
54 int count; /* When zero, we can release it */ 54 int count; /* When zero, we can release it */
55 int layer; /* distance from leaf */ 55 int layer; /* distance from leaf */
56 struct rcu_head rcu_head; 56 struct rcu_head rcu_head;
57}; 57};
58 58
59struct idr { 59struct idr {
60 struct idr_layer *top; 60 struct idr_layer __rcu *top;
61 struct idr_layer *id_free; 61 struct idr_layer *id_free;
62 int layers; /* only valid without concurrent changes */ 62 int layers; /* only valid without concurrent changes */
63 int id_free_cnt; 63 int id_free_cnt;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1f43fa56f60..2fea6c8ef6b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -82,11 +82,17 @@ extern struct group_info init_groups;
82# define CAP_INIT_BSET CAP_FULL_SET 82# define CAP_INIT_BSET CAP_FULL_SET
83 83
84#ifdef CONFIG_TREE_PREEMPT_RCU 84#ifdef CONFIG_TREE_PREEMPT_RCU
85#define INIT_TASK_RCU_TREE_PREEMPT() \
86 .rcu_blocked_node = NULL,
87#else
88#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
89#endif
90#ifdef CONFIG_PREEMPT_RCU
85#define INIT_TASK_RCU_PREEMPT(tsk) \ 91#define INIT_TASK_RCU_PREEMPT(tsk) \
86 .rcu_read_lock_nesting = 0, \ 92 .rcu_read_lock_nesting = 0, \
87 .rcu_read_unlock_special = 0, \ 93 .rcu_read_unlock_special = 0, \
88 .rcu_blocked_node = NULL, \ 94 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
89 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), 95 INIT_TASK_RCU_TREE_PREEMPT()
90#else 96#else
91#define INIT_TASK_RCU_PREEMPT(tsk) 97#define INIT_TASK_RCU_PREEMPT(tsk)
92#endif 98#endif
@@ -137,8 +143,8 @@ extern struct cred init_cred;
137 .children = LIST_HEAD_INIT(tsk.children), \ 143 .children = LIST_HEAD_INIT(tsk.children), \
138 .sibling = LIST_HEAD_INIT(tsk.sibling), \ 144 .sibling = LIST_HEAD_INIT(tsk.sibling), \
139 .group_leader = &tsk, \ 145 .group_leader = &tsk, \
140 .real_cred = &init_cred, \ 146 RCU_INIT_POINTER(.real_cred, &init_cred), \
141 .cred = &init_cred, \ 147 RCU_INIT_POINTER(.cred, &init_cred), \
142 .cred_guard_mutex = \ 148 .cred_guard_mutex = \
143 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ 149 __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \
144 .comm = "swapper", \ 150 .comm = "swapper", \
diff --git a/include/linux/input.h b/include/linux/input.h
index 896a92227bc..d6ae1761be9 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1196,7 +1196,7 @@ struct input_dev {
1196 int (*flush)(struct input_dev *dev, struct file *file); 1196 int (*flush)(struct input_dev *dev, struct file *file);
1197 int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); 1197 int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value);
1198 1198
1199 struct input_handle *grab; 1199 struct input_handle __rcu *grab;
1200 1200
1201 spinlock_t event_lock; 1201 spinlock_t event_lock;
1202 struct mutex mutex; 1202 struct mutex mutex;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 64d52913303..3e70b21884a 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -53,7 +53,7 @@ struct io_context {
53 53
54 struct radix_tree_root radix_root; 54 struct radix_tree_root radix_root;
55 struct hlist_head cic_list; 55 struct hlist_head cic_list;
56 void *ioc_data; 56 void __rcu *ioc_data;
57}; 57};
58 58
59static inline struct io_context *ioc_task_link(struct io_context *ioc) 59static inline struct io_context *ioc_task_link(struct io_context *ioc)
diff --git a/include/linux/key.h b/include/linux/key.h
index cd50dfa1d4c..3db0adce1fd 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -178,8 +178,9 @@ struct key {
178 */ 178 */
179 union { 179 union {
180 unsigned long value; 180 unsigned long value;
181 void __rcu *rcudata;
181 void *data; 182 void *data;
182 struct keyring_list *subscriptions; 183 struct keyring_list __rcu *subscriptions;
183 } payload; 184 } payload;
184}; 185};
185 186
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c13cc48697a..ac740b26eb1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -205,7 +205,7 @@ struct kvm {
205 205
206 struct mutex irq_lock; 206 struct mutex irq_lock;
207#ifdef CONFIG_HAVE_KVM_IRQCHIP 207#ifdef CONFIG_HAVE_KVM_IRQCHIP
208 struct kvm_irq_routing_table *irq_routing; 208 struct kvm_irq_routing_table __rcu *irq_routing;
209 struct hlist_head mask_notifier_list; 209 struct hlist_head mask_notifier_list;
210 struct hlist_head irq_ack_notifier_list; 210 struct hlist_head irq_ack_notifier_list;
211#endif 211#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ee7e258627f..cb57d657ce4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -299,7 +299,7 @@ struct mm_struct {
299 * new_owner->mm == mm 299 * new_owner->mm == mm
300 * new_owner->alloc_lock is held 300 * new_owner->alloc_lock is held
301 */ 301 */
302 struct task_struct *owner; 302 struct task_struct __rcu *owner;
303#endif 303#endif
304 304
305#ifdef CONFIG_PROC_FS 305#ifdef CONFIG_PROC_FS
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 508f8cf6da3..d0edf7d823a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,7 +185,7 @@ struct nfs_inode {
185 struct nfs4_cached_acl *nfs4_acl; 185 struct nfs4_cached_acl *nfs4_acl;
186 /* NFSv4 state */ 186 /* NFSv4 state */
187 struct list_head open_states; 187 struct list_head open_states;
188 struct nfs_delegation *delegation; 188 struct nfs_delegation __rcu *delegation;
189 fmode_t delegation_state; 189 fmode_t delegation_state;
190 struct rw_semaphore rwsem; 190 struct rw_semaphore rwsem;
191#endif /* CONFIG_NFS_V4*/ 191#endif /* CONFIG_NFS_V4*/
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b2f1a4d8355..2026f9e1ceb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -49,28 +49,28 @@
49 49
50struct notifier_block { 50struct notifier_block {
51 int (*notifier_call)(struct notifier_block *, unsigned long, void *); 51 int (*notifier_call)(struct notifier_block *, unsigned long, void *);
52 struct notifier_block *next; 52 struct notifier_block __rcu *next;
53 int priority; 53 int priority;
54}; 54};
55 55
56struct atomic_notifier_head { 56struct atomic_notifier_head {
57 spinlock_t lock; 57 spinlock_t lock;
58 struct notifier_block *head; 58 struct notifier_block __rcu *head;
59}; 59};
60 60
61struct blocking_notifier_head { 61struct blocking_notifier_head {
62 struct rw_semaphore rwsem; 62 struct rw_semaphore rwsem;
63 struct notifier_block *head; 63 struct notifier_block __rcu *head;
64}; 64};
65 65
66struct raw_notifier_head { 66struct raw_notifier_head {
67 struct notifier_block *head; 67 struct notifier_block __rcu *head;
68}; 68};
69 69
70struct srcu_notifier_head { 70struct srcu_notifier_head {
71 struct mutex mutex; 71 struct mutex mutex;
72 struct srcu_struct srcu; 72 struct srcu_struct srcu;
73 struct notifier_block *head; 73 struct notifier_block __rcu *head;
74}; 74};
75 75
76#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ 76#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 634b8e674ac..a39cbed9ee1 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr)
47{ 47{
48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); 48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
49} 49}
50#define radix_tree_indirect_to_ptr(ptr) \
51 radix_tree_indirect_to_ptr((void __force *)(ptr))
50 52
51static inline int radix_tree_is_indirect_ptr(void *ptr) 53static inline int radix_tree_is_indirect_ptr(void *ptr)
52{ 54{
@@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
61struct radix_tree_root { 63struct radix_tree_root {
62 unsigned int height; 64 unsigned int height;
63 gfp_t gfp_mask; 65 gfp_t gfp_mask;
64 struct radix_tree_node *rnode; 66 struct radix_tree_node __rcu *rnode;
65}; 67};
66 68
67#define RADIX_TREE_INIT(mask) { \ 69#define RADIX_TREE_INIT(mask) { \
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4ec3b38ce9c..f31ef61f1c6 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -10,6 +10,21 @@
10#include <linux/rcupdate.h> 10#include <linux/rcupdate.h>
11 11
12/* 12/*
13 * Why is there no list_empty_rcu()? Because list_empty() serves this
14 * purpose. The list_empty() function fetches the RCU-protected pointer
15 * and compares it to the address of the list head, but neither dereferences
16 * this pointer itself nor provides this pointer to the caller. Therefore,
17 * it is not necessary to use rcu_dereference(), so that list_empty() can
18 * be used anywhere you would want to use a list_empty_rcu().
19 */
20
21/*
22 * return the ->next pointer of a list_head in an rcu safe
23 * way, we must not access it directly
24 */
25#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
26
27/*
13 * Insert a new entry between two known consecutive entries. 28 * Insert a new entry between two known consecutive entries.
14 * 29 *
15 * This is only for internal list manipulation where we know 30 * This is only for internal list manipulation where we know
@@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new,
20{ 35{
21 new->next = next; 36 new->next = next;
22 new->prev = prev; 37 new->prev = prev;
23 rcu_assign_pointer(prev->next, new); 38 rcu_assign_pointer(list_next_rcu(prev), new);
24 next->prev = new; 39 next->prev = new;
25} 40}
26 41
@@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old,
138{ 153{
139 new->next = old->next; 154 new->next = old->next;
140 new->prev = old->prev; 155 new->prev = old->prev;
141 rcu_assign_pointer(new->prev->next, new); 156 rcu_assign_pointer(list_next_rcu(new->prev), new);
142 new->next->prev = new; 157 new->next->prev = new;
143 old->prev = LIST_POISON2; 158 old->prev = LIST_POISON2;
144} 159}
@@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
193 */ 208 */
194 209
195 last->next = at; 210 last->next = at;
196 rcu_assign_pointer(head->next, first); 211 rcu_assign_pointer(list_next_rcu(head), first);
197 first->prev = head; 212 first->prev = head;
198 at->prev = last; 213 at->prev = last;
199} 214}
@@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
208 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). 223 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
209 */ 224 */
210#define list_entry_rcu(ptr, type, member) \ 225#define list_entry_rcu(ptr, type, member) \
211 container_of(rcu_dereference_raw(ptr), type, member) 226 ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \
227 container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
228 })
212 229
213/** 230/**
214 * list_first_entry_rcu - get the first element from a list 231 * list_first_entry_rcu - get the first element from a list
@@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
225 list_entry_rcu((ptr)->next, type, member) 242 list_entry_rcu((ptr)->next, type, member)
226 243
227#define __list_for_each_rcu(pos, head) \ 244#define __list_for_each_rcu(pos, head) \
228 for (pos = rcu_dereference_raw((head)->next); \ 245 for (pos = rcu_dereference_raw(list_next_rcu(head)); \
229 pos != (head); \ 246 pos != (head); \
230 pos = rcu_dereference_raw(pos->next)) 247 pos = rcu_dereference_raw(list_next_rcu((pos)))
231 248
232/** 249/**
233 * list_for_each_entry_rcu - iterate over rcu list of given type 250 * list_for_each_entry_rcu - iterate over rcu list of given type
@@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
257 * as long as the traversal is guarded by rcu_read_lock(). 274 * as long as the traversal is guarded by rcu_read_lock().
258 */ 275 */
259#define list_for_each_continue_rcu(pos, head) \ 276#define list_for_each_continue_rcu(pos, head) \
260 for ((pos) = rcu_dereference_raw((pos)->next); \ 277 for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
261 prefetch((pos)->next), (pos) != (head); \ 278 prefetch((pos)->next), (pos) != (head); \
262 (pos) = rcu_dereference_raw((pos)->next)) 279 (pos) = rcu_dereference_raw(list_next_rcu(pos)))
263 280
264/** 281/**
265 * list_for_each_entry_continue_rcu - continue iteration over list of given type 282 * list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
314 331
315 new->next = next; 332 new->next = next;
316 new->pprev = old->pprev; 333 new->pprev = old->pprev;
317 rcu_assign_pointer(*new->pprev, new); 334 rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
318 if (next) 335 if (next)
319 new->next->pprev = &new->next; 336 new->next->pprev = &new->next;
320 old->pprev = LIST_POISON2; 337 old->pprev = LIST_POISON2;
321} 338}
322 339
340/*
341 * return the first or the next element in an RCU protected hlist
342 */
343#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
344#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
345#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
346
323/** 347/**
324 * hlist_add_head_rcu 348 * hlist_add_head_rcu
325 * @n: the element to add to the hash list. 349 * @n: the element to add to the hash list.
@@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n,
346 370
347 n->next = first; 371 n->next = first;
348 n->pprev = &h->first; 372 n->pprev = &h->first;
349 rcu_assign_pointer(h->first, n); 373 rcu_assign_pointer(hlist_first_rcu(h), n);
350 if (first) 374 if (first)
351 first->pprev = &n->next; 375 first->pprev = &n->next;
352} 376}
@@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
374{ 398{
375 n->pprev = next->pprev; 399 n->pprev = next->pprev;
376 n->next = next; 400 n->next = next;
377 rcu_assign_pointer(*(n->pprev), n); 401 rcu_assign_pointer(hlist_pprev_rcu(n), n);
378 next->pprev = &n->next; 402 next->pprev = &n->next;
379} 403}
380 404
@@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
401{ 425{
402 n->next = prev->next; 426 n->next = prev->next;
403 n->pprev = &prev->next; 427 n->pprev = &prev->next;
404 rcu_assign_pointer(prev->next, n); 428 rcu_assign_pointer(hlist_next_rcu(prev), n);
405 if (n->next) 429 if (n->next)
406 n->next->pprev = &n->next; 430 n->next->pprev = &n->next;
407} 431}
408 432
409#define __hlist_for_each_rcu(pos, head) \ 433#define __hlist_for_each_rcu(pos, head) \
410 for (pos = rcu_dereference((head)->first); \ 434 for (pos = rcu_dereference(hlist_first_rcu(head)); \
411 pos && ({ prefetch(pos->next); 1; }); \ 435 pos && ({ prefetch(pos->next); 1; }); \
412 pos = rcu_dereference(pos->next)) 436 pos = rcu_dereference(hlist_next_rcu(pos)))
413 437
414/** 438/**
415 * hlist_for_each_entry_rcu - iterate over rcu list of given type 439 * hlist_for_each_entry_rcu - iterate over rcu list of given type
@@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
422 * the _rcu list-mutation primitives such as hlist_add_head_rcu() 446 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
423 * as long as the traversal is guarded by rcu_read_lock(). 447 * as long as the traversal is guarded by rcu_read_lock().
424 */ 448 */
425#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ 449#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
426 for (pos = rcu_dereference_raw((head)->first); \ 450 for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \
427 pos && ({ prefetch(pos->next); 1; }) && \ 451 pos && ({ prefetch(pos->next); 1; }) && \
428 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ 452 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
429 pos = rcu_dereference_raw(pos->next)) 453 pos = rcu_dereference_raw(hlist_next_rcu(pos)))
430 454
431/** 455/**
432 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type 456 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index b70ffe53cb9..2ae13714828 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
37 } 37 }
38} 38}
39 39
40#define hlist_nulls_first_rcu(head) \
41 (*((struct hlist_nulls_node __rcu __force **)&(head)->first))
42
43#define hlist_nulls_next_rcu(node) \
44 (*((struct hlist_nulls_node __rcu __force **)&(node)->next))
45
40/** 46/**
41 * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization 47 * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
42 * @n: the element to delete from the hash list. 48 * @n: the element to delete from the hash list.
@@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
88 94
89 n->next = first; 95 n->next = first;
90 n->pprev = &h->first; 96 n->pprev = &h->first;
91 rcu_assign_pointer(h->first, n); 97 rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
92 if (!is_a_nulls(first)) 98 if (!is_a_nulls(first))
93 first->pprev = &n->next; 99 first->pprev = &n->next;
94} 100}
@@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
100 * @member: the name of the hlist_nulls_node within the struct. 106 * @member: the name of the hlist_nulls_node within the struct.
101 * 107 *
102 */ 108 */
103#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ 109#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
104 for (pos = rcu_dereference_raw((head)->first); \ 110 for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
105 (!is_a_nulls(pos)) && \ 111 (!is_a_nulls(pos)) && \
106 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 112 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
107 pos = rcu_dereference_raw(pos->next)) 113 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
108 114
109#endif 115#endif
110#endif 116#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a2585..89414d67d96 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,11 +41,15 @@
41#include <linux/lockdep.h> 41#include <linux/lockdep.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
43#include <linux/debugobjects.h> 43#include <linux/debugobjects.h>
44#include <linux/compiler.h>
44 45
45#ifdef CONFIG_RCU_TORTURE_TEST 46#ifdef CONFIG_RCU_TORTURE_TEST
46extern int rcutorture_runnable; /* for sysctl */ 47extern int rcutorture_runnable; /* for sysctl */
47#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
48 49
50#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
51#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
52
49/** 53/**
50 * struct rcu_head - callback structure for use with RCU 54 * struct rcu_head - callback structure for use with RCU
51 * @next: next update requests in a list 55 * @next: next update requests in a list
@@ -57,29 +61,94 @@ struct rcu_head {
57}; 61};
58 62
59/* Exported common interfaces */ 63/* Exported common interfaces */
60extern void rcu_barrier(void); 64extern void call_rcu_sched(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu));
66extern void synchronize_sched(void);
61extern void rcu_barrier_bh(void); 67extern void rcu_barrier_bh(void);
62extern void rcu_barrier_sched(void); 68extern void rcu_barrier_sched(void);
63extern void synchronize_sched_expedited(void); 69extern void synchronize_sched_expedited(void);
64extern int sched_expedited_torture_stats(char *page); 70extern int sched_expedited_torture_stats(char *page);
65 71
72static inline void __rcu_read_lock_bh(void)
73{
74 local_bh_disable();
75}
76
77static inline void __rcu_read_unlock_bh(void)
78{
79 local_bh_enable();
80}
81
82#ifdef CONFIG_PREEMPT_RCU
83
84extern void __rcu_read_lock(void);
85extern void __rcu_read_unlock(void);
86void synchronize_rcu(void);
87
88/*
89 * Defined as a macro as it is a very low level header included from
90 * areas that don't even know about current. This gives the rcu_read_lock()
91 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
92 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
93 */
94#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
95
96#else /* #ifdef CONFIG_PREEMPT_RCU */
97
98static inline void __rcu_read_lock(void)
99{
100 preempt_disable();
101}
102
103static inline void __rcu_read_unlock(void)
104{
105 preempt_enable();
106}
107
108static inline void synchronize_rcu(void)
109{
110 synchronize_sched();
111}
112
113static inline int rcu_preempt_depth(void)
114{
115 return 0;
116}
117
118#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
119
66/* Internal to kernel */ 120/* Internal to kernel */
67extern void rcu_init(void); 121extern void rcu_init(void);
122extern void rcu_sched_qs(int cpu);
123extern void rcu_bh_qs(int cpu);
124extern void rcu_check_callbacks(int cpu, int user);
125struct notifier_block;
126
127#ifdef CONFIG_NO_HZ
128
129extern void rcu_enter_nohz(void);
130extern void rcu_exit_nohz(void);
131
132#else /* #ifdef CONFIG_NO_HZ */
133
134static inline void rcu_enter_nohz(void)
135{
136}
137
138static inline void rcu_exit_nohz(void)
139{
140}
141
142#endif /* #else #ifdef CONFIG_NO_HZ */
68 143
69#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 144#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
70#include <linux/rcutree.h> 145#include <linux/rcutree.h>
71#elif defined(CONFIG_TINY_RCU) 146#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
72#include <linux/rcutiny.h> 147#include <linux/rcutiny.h>
73#else 148#else
74#error "Unknown RCU implementation specified to kernel configuration" 149#error "Unknown RCU implementation specified to kernel configuration"
75#endif 150#endif
76 151
77#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
78#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
79#define INIT_RCU_HEAD(ptr) do { \
80 (ptr)->next = NULL; (ptr)->func = NULL; \
81} while (0)
82
83/* 152/*
84 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 153 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
85 * initialization and destruction of rcu_head on the stack. rcu_head structures 154 * initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map;
120extern int debug_lockdep_rcu_enabled(void); 189extern int debug_lockdep_rcu_enabled(void);
121 190
122/** 191/**
123 * rcu_read_lock_held - might we be in RCU read-side critical section? 192 * rcu_read_lock_held() - might we be in RCU read-side critical section?
124 * 193 *
125 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 194 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
126 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 195 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
127 * this assumes we are in an RCU read-side critical section unless it can 196 * this assumes we are in an RCU read-side critical section unless it can
128 * prove otherwise. 197 * prove otherwise. This is useful for debug checks in functions that
198 * require that they be called within an RCU read-side critical section.
129 * 199 *
130 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 200 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
131 * and while lockdep is disabled. 201 * and while lockdep is disabled.
132 */ 202 */
133static inline int rcu_read_lock_held(void) 203static inline int rcu_read_lock_held(void)
@@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void)
144extern int rcu_read_lock_bh_held(void); 214extern int rcu_read_lock_bh_held(void);
145 215
146/** 216/**
147 * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? 217 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
148 * 218 *
149 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an 219 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
150 * RCU-sched read-side critical section. In absence of 220 * RCU-sched read-side critical section. In absence of
151 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side 221 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
152 * critical section unless it can prove otherwise. Note that disabling 222 * critical section unless it can prove otherwise. Note that disabling
153 * of preemption (including disabling irqs) counts as an RCU-sched 223 * of preemption (including disabling irqs) counts as an RCU-sched
154 * read-side critical section. 224 * read-side critical section. This is useful for debug checks in functions
225 * that required that they be called within an RCU-sched read-side
226 * critical section.
155 * 227 *
156 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 228 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
157 * and while lockdep is disabled. 229 * and while lockdep is disabled.
@@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void)
211 283
212extern int rcu_my_thread_group_empty(void); 284extern int rcu_my_thread_group_empty(void);
213 285
214#define __do_rcu_dereference_check(c) \ 286/**
287 * rcu_lockdep_assert - emit lockdep splat if specified condition not met
288 * @c: condition to check
289 */
290#define rcu_lockdep_assert(c) \
215 do { \ 291 do { \
216 static bool __warned; \ 292 static bool __warned; \
217 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ 293 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
@@ -220,41 +296,155 @@ extern int rcu_my_thread_group_empty(void);
220 } \ 296 } \
221 } while (0) 297 } while (0)
222 298
299#else /* #ifdef CONFIG_PROVE_RCU */
300
301#define rcu_lockdep_assert(c) do { } while (0)
302
303#endif /* #else #ifdef CONFIG_PROVE_RCU */
304
305/*
306 * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
307 * and rcu_assign_pointer(). Some of these could be folded into their
308 * callers, but they are left separate in order to ease introduction of
309 * multiple flavors of pointers to match the multiple flavors of RCU
310 * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
311 * the future.
312 */
313#define __rcu_access_pointer(p, space) \
314 ({ \
315 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
316 (void) (((typeof (*p) space *)p) == p); \
317 ((typeof(*p) __force __kernel *)(_________p1)); \
318 })
319#define __rcu_dereference_check(p, c, space) \
320 ({ \
321 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
322 rcu_lockdep_assert(c); \
323 (void) (((typeof (*p) space *)p) == p); \
324 smp_read_barrier_depends(); \
325 ((typeof(*p) __force __kernel *)(_________p1)); \
326 })
327#define __rcu_dereference_protected(p, c, space) \
328 ({ \
329 rcu_lockdep_assert(c); \
330 (void) (((typeof (*p) space *)p) == p); \
331 ((typeof(*p) __force __kernel *)(p)); \
332 })
333
334#define __rcu_dereference_index_check(p, c) \
335 ({ \
336 typeof(p) _________p1 = ACCESS_ONCE(p); \
337 rcu_lockdep_assert(c); \
338 smp_read_barrier_depends(); \
339 (_________p1); \
340 })
341#define __rcu_assign_pointer(p, v, space) \
342 ({ \
343 if (!__builtin_constant_p(v) || \
344 ((v) != NULL)) \
345 smp_wmb(); \
346 (p) = (typeof(*v) __force space *)(v); \
347 })
348
349
223/** 350/**
224 * rcu_dereference_check - rcu_dereference with debug checking 351 * rcu_access_pointer() - fetch RCU pointer with no dereferencing
352 * @p: The pointer to read
353 *
354 * Return the value of the specified RCU-protected pointer, but omit the
355 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
356 * when the value of this pointer is accessed, but the pointer is not
357 * dereferenced, for example, when testing an RCU-protected pointer against
358 * NULL. Although rcu_access_pointer() may also be used in cases where
359 * update-side locks prevent the value of the pointer from changing, you
360 * should instead use rcu_dereference_protected() for this use case.
361 */
362#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
363
364/**
365 * rcu_dereference_check() - rcu_dereference with debug checking
225 * @p: The pointer to read, prior to dereferencing 366 * @p: The pointer to read, prior to dereferencing
226 * @c: The conditions under which the dereference will take place 367 * @c: The conditions under which the dereference will take place
227 * 368 *
228 * Do an rcu_dereference(), but check that the conditions under which the 369 * Do an rcu_dereference(), but check that the conditions under which the
229 * dereference will take place are correct. Typically the conditions indicate 370 * dereference will take place are correct. Typically the conditions
230 * the various locking conditions that should be held at that point. The check 371 * indicate the various locking conditions that should be held at that
231 * should return true if the conditions are satisfied. 372 * point. The check should return true if the conditions are satisfied.
373 * An implicit check for being in an RCU read-side critical section
374 * (rcu_read_lock()) is included.
232 * 375 *
233 * For example: 376 * For example:
234 * 377 *
235 * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || 378 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
236 * lockdep_is_held(&foo->lock));
237 * 379 *
238 * could be used to indicate to lockdep that foo->bar may only be dereferenced 380 * could be used to indicate to lockdep that foo->bar may only be dereferenced
239 * if either the RCU read lock is held, or that the lock required to replace 381 * if either rcu_read_lock() is held, or that the lock required to replace
240 * the bar struct at foo->bar is held. 382 * the bar struct at foo->bar is held.
241 * 383 *
242 * Note that the list of conditions may also include indications of when a lock 384 * Note that the list of conditions may also include indications of when a lock
243 * need not be held, for example during initialisation or destruction of the 385 * need not be held, for example during initialisation or destruction of the
244 * target struct: 386 * target struct:
245 * 387 *
246 * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || 388 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
247 * lockdep_is_held(&foo->lock) ||
248 * atomic_read(&foo->usage) == 0); 389 * atomic_read(&foo->usage) == 0);
390 *
391 * Inserts memory barriers on architectures that require them
392 * (currently only the Alpha), prevents the compiler from refetching
393 * (and from merging fetches), and, more importantly, documents exactly
394 * which pointers are protected by RCU and checks that the pointer is
395 * annotated as __rcu.
249 */ 396 */
250#define rcu_dereference_check(p, c) \ 397#define rcu_dereference_check(p, c) \
251 ({ \ 398 __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
252 __do_rcu_dereference_check(c); \
253 rcu_dereference_raw(p); \
254 })
255 399
256/** 400/**
257 * rcu_dereference_protected - fetch RCU pointer when updates prevented 401 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
402 * @p: The pointer to read, prior to dereferencing
403 * @c: The conditions under which the dereference will take place
404 *
405 * This is the RCU-bh counterpart to rcu_dereference_check().
406 */
407#define rcu_dereference_bh_check(p, c) \
408 __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
409
410/**
411 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
412 * @p: The pointer to read, prior to dereferencing
413 * @c: The conditions under which the dereference will take place
414 *
415 * This is the RCU-sched counterpart to rcu_dereference_check().
416 */
417#define rcu_dereference_sched_check(p, c) \
418 __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
419 __rcu)
420
421#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
422
423/**
424 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
425 * @p: The pointer to read, prior to dereferencing
426 * @c: The conditions under which the dereference will take place
427 *
428 * Similar to rcu_dereference_check(), but omits the sparse checking.
429 * This allows rcu_dereference_index_check() to be used on integers,
430 * which can then be used as array indices. Attempting to use
431 * rcu_dereference_check() on an integer will give compiler warnings
432 * because the sparse address-space mechanism relies on dereferencing
433 * the RCU-protected pointer. Dereferencing integers is not something
434 * that even gcc will put up with.
435 *
436 * Note that this function does not implicitly check for RCU read-side
437 * critical sections. If this function gains lots of uses, it might
438 * make sense to provide versions for each flavor of RCU, but it does
439 * not make sense as of early 2010.
440 */
441#define rcu_dereference_index_check(p, c) \
442 __rcu_dereference_index_check((p), (c))
443
444/**
445 * rcu_dereference_protected() - fetch RCU pointer when updates prevented
446 * @p: The pointer to read, prior to dereferencing
447 * @c: The conditions under which the dereference will take place
258 * 448 *
259 * Return the value of the specified RCU-protected pointer, but omit 449 * Return the value of the specified RCU-protected pointer, but omit
260 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This 450 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This
@@ -263,35 +453,61 @@ extern int rcu_my_thread_group_empty(void);
263 * prevent the compiler from repeating this reference or combining it 453 * prevent the compiler from repeating this reference or combining it
264 * with other references, so it should not be used without protection 454 * with other references, so it should not be used without protection
265 * of appropriate locks. 455 * of appropriate locks.
456 *
457 * This function is only for update-side use. Using this function
458 * when protected only by rcu_read_lock() will result in infrequent
459 * but very ugly failures.
266 */ 460 */
267#define rcu_dereference_protected(p, c) \ 461#define rcu_dereference_protected(p, c) \
268 ({ \ 462 __rcu_dereference_protected((p), (c), __rcu)
269 __do_rcu_dereference_check(c); \
270 (p); \
271 })
272 463
273#else /* #ifdef CONFIG_PROVE_RCU */ 464/**
465 * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
466 * @p: The pointer to read, prior to dereferencing
467 * @c: The conditions under which the dereference will take place
468 *
469 * This is the RCU-bh counterpart to rcu_dereference_protected().
470 */
471#define rcu_dereference_bh_protected(p, c) \
472 __rcu_dereference_protected((p), (c), __rcu)
274 473
275#define rcu_dereference_check(p, c) rcu_dereference_raw(p) 474/**
276#define rcu_dereference_protected(p, c) (p) 475 * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
476 * @p: The pointer to read, prior to dereferencing
477 * @c: The conditions under which the dereference will take place
478 *
479 * This is the RCU-sched counterpart to rcu_dereference_protected().
480 */
481#define rcu_dereference_sched_protected(p, c) \
482 __rcu_dereference_protected((p), (c), __rcu)
277 483
278#endif /* #else #ifdef CONFIG_PROVE_RCU */
279 484
280/** 485/**
281 * rcu_access_pointer - fetch RCU pointer with no dereferencing 486 * rcu_dereference() - fetch RCU-protected pointer for dereferencing
487 * @p: The pointer to read, prior to dereferencing
282 * 488 *
283 * Return the value of the specified RCU-protected pointer, but omit the 489 * This is a simple wrapper around rcu_dereference_check().
284 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 490 */
285 * when the value of this pointer is accessed, but the pointer is not 491#define rcu_dereference(p) rcu_dereference_check(p, 0)
286 * dereferenced, for example, when testing an RCU-protected pointer against 492
287 * NULL. This may also be used in cases where update-side locks prevent 493/**
288 * the value of the pointer from changing, but rcu_dereference_protected() 494 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
289 * is a lighter-weight primitive for this use case. 495 * @p: The pointer to read, prior to dereferencing
496 *
497 * Makes rcu_dereference_check() do the dirty work.
498 */
499#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
500
501/**
502 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
503 * @p: The pointer to read, prior to dereferencing
504 *
505 * Makes rcu_dereference_check() do the dirty work.
290 */ 506 */
291#define rcu_access_pointer(p) ACCESS_ONCE(p) 507#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
292 508
293/** 509/**
294 * rcu_read_lock - mark the beginning of an RCU read-side critical section. 510 * rcu_read_lock() - mark the beginning of an RCU read-side critical section
295 * 511 *
296 * When synchronize_rcu() is invoked on one CPU while other CPUs 512 * When synchronize_rcu() is invoked on one CPU while other CPUs
297 * are within RCU read-side critical sections, then the 513 * are within RCU read-side critical sections, then the
@@ -302,7 +518,7 @@ extern int rcu_my_thread_group_empty(void);
302 * until after the all the other CPUs exit their critical sections. 518 * until after the all the other CPUs exit their critical sections.
303 * 519 *
304 * Note, however, that RCU callbacks are permitted to run concurrently 520 * Note, however, that RCU callbacks are permitted to run concurrently
305 * with RCU read-side critical sections. One way that this can happen 521 * with new RCU read-side critical sections. One way that this can happen
306 * is via the following sequence of events: (1) CPU 0 enters an RCU 522 * is via the following sequence of events: (1) CPU 0 enters an RCU
307 * read-side critical section, (2) CPU 1 invokes call_rcu() to register 523 * read-side critical section, (2) CPU 1 invokes call_rcu() to register
308 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, 524 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
@@ -317,7 +533,20 @@ extern int rcu_my_thread_group_empty(void);
317 * will be deferred until the outermost RCU read-side critical section 533 * will be deferred until the outermost RCU read-side critical section
318 * completes. 534 * completes.
319 * 535 *
320 * It is illegal to block while in an RCU read-side critical section. 536 * You can avoid reading and understanding the next paragraph by
537 * following this rule: don't put anything in an rcu_read_lock() RCU
538 * read-side critical section that would block in a !PREEMPT kernel.
539 * But if you want the full story, read on!
540 *
541 * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
542 * is illegal to block while in an RCU read-side critical section. In
543 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
544 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
545 * be preempted, but explicit blocking is illegal. Finally, in preemptible
546 * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
547 * RCU read-side critical sections may be preempted and they may also
548 * block, but only when acquiring spinlocks that are subject to priority
549 * inheritance.
321 */ 550 */
322static inline void rcu_read_lock(void) 551static inline void rcu_read_lock(void)
323{ 552{
@@ -337,7 +566,7 @@ static inline void rcu_read_lock(void)
337 */ 566 */
338 567
339/** 568/**
340 * rcu_read_unlock - marks the end of an RCU read-side critical section. 569 * rcu_read_unlock() - marks the end of an RCU read-side critical section.
341 * 570 *
342 * See rcu_read_lock() for more information. 571 * See rcu_read_lock() for more information.
343 */ 572 */
@@ -349,15 +578,16 @@ static inline void rcu_read_unlock(void)
349} 578}
350 579
351/** 580/**
352 * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section 581 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
353 * 582 *
354 * This is equivalent of rcu_read_lock(), but to be used when updates 583 * This is equivalent of rcu_read_lock(), but to be used when updates
355 * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks 584 * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
356 * consider completion of a softirq handler to be a quiescent state, 585 * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
357 * a process in RCU read-side critical section must be protected by 586 * softirq handler to be a quiescent state, a process in RCU read-side
358 * disabling softirqs. Read-side critical sections in interrupt context 587 * critical section must be protected by disabling softirqs. Read-side
359 * can use just rcu_read_lock(). 588 * critical sections in interrupt context can use just rcu_read_lock(),
360 * 589 * though this should at least be commented to avoid confusing people
590 * reading the code.
361 */ 591 */
362static inline void rcu_read_lock_bh(void) 592static inline void rcu_read_lock_bh(void)
363{ 593{
@@ -379,13 +609,12 @@ static inline void rcu_read_unlock_bh(void)
379} 609}
380 610
381/** 611/**
382 * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section 612 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
383 * 613 *
384 * Should be used with either 614 * This is equivalent of rcu_read_lock(), but to be used when updates
385 * - synchronize_sched() 615 * are being done using call_rcu_sched() or synchronize_rcu_sched().
386 * or 616 * Read-side critical sections can also be introduced by anything that
387 * - call_rcu_sched() and rcu_barrier_sched() 617 * disables preemption, including local_irq_disable() and friends.
388 * on the write-side to insure proper synchronization.
389 */ 618 */
390static inline void rcu_read_lock_sched(void) 619static inline void rcu_read_lock_sched(void)
391{ 620{
@@ -420,54 +649,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
420 preempt_enable_notrace(); 649 preempt_enable_notrace();
421} 650}
422 651
423
424/**
425 * rcu_dereference_raw - fetch an RCU-protected pointer
426 *
427 * The caller must be within some flavor of RCU read-side critical
428 * section, or must be otherwise preventing the pointer from changing,
429 * for example, by holding an appropriate lock. This pointer may later
430 * be safely dereferenced. It is the caller's responsibility to have
431 * done the right thing, as this primitive does no checking of any kind.
432 *
433 * Inserts memory barriers on architectures that require them
434 * (currently only the Alpha), and, more importantly, documents
435 * exactly which pointers are protected by RCU.
436 */
437#define rcu_dereference_raw(p) ({ \
438 typeof(p) _________p1 = ACCESS_ONCE(p); \
439 smp_read_barrier_depends(); \
440 (_________p1); \
441 })
442
443/**
444 * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
445 *
446 * Makes rcu_dereference_check() do the dirty work.
447 */
448#define rcu_dereference(p) \
449 rcu_dereference_check(p, rcu_read_lock_held())
450
451/** 652/**
452 * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh 653 * rcu_assign_pointer() - assign to RCU-protected pointer
654 * @p: pointer to assign to
655 * @v: value to assign (publish)
453 * 656 *
454 * Makes rcu_dereference_check() do the dirty work. 657 * Assigns the specified value to the specified RCU-protected
455 */ 658 * pointer, ensuring that any concurrent RCU readers will see
456#define rcu_dereference_bh(p) \ 659 * any prior initialization. Returns the value assigned.
457 rcu_dereference_check(p, rcu_read_lock_bh_held())
458
459/**
460 * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
461 *
462 * Makes rcu_dereference_check() do the dirty work.
463 */
464#define rcu_dereference_sched(p) \
465 rcu_dereference_check(p, rcu_read_lock_sched_held())
466
467/**
468 * rcu_assign_pointer - assign (publicize) a pointer to a newly
469 * initialized structure that will be dereferenced by RCU read-side
470 * critical sections. Returns the value assigned.
471 * 660 *
472 * Inserts memory barriers on architectures that require them 661 * Inserts memory barriers on architectures that require them
473 * (pretty much all of them other than x86), and also prevents 662 * (pretty much all of them other than x86), and also prevents
@@ -476,14 +665,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
476 * call documents which pointers will be dereferenced by RCU read-side 665 * call documents which pointers will be dereferenced by RCU read-side
477 * code. 666 * code.
478 */ 667 */
479
480#define rcu_assign_pointer(p, v) \ 668#define rcu_assign_pointer(p, v) \
481 ({ \ 669 __rcu_assign_pointer((p), (v), __rcu)
482 if (!__builtin_constant_p(v) || \ 670
483 ((v) != NULL)) \ 671/**
484 smp_wmb(); \ 672 * RCU_INIT_POINTER() - initialize an RCU protected pointer
485 (p) = (v); \ 673 *
486 }) 674 * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
675 * splats.
676 */
677#define RCU_INIT_POINTER(p, v) \
678 p = (typeof(*v) __force __rcu *)(v)
487 679
488/* Infrastructure to implement the synchronize_() primitives. */ 680/* Infrastructure to implement the synchronize_() primitives. */
489 681
@@ -494,26 +686,37 @@ struct rcu_synchronize {
494 686
495extern void wakeme_after_rcu(struct rcu_head *head); 687extern void wakeme_after_rcu(struct rcu_head *head);
496 688
689#ifdef CONFIG_PREEMPT_RCU
690
497/** 691/**
498 * call_rcu - Queue an RCU callback for invocation after a grace period. 692 * call_rcu() - Queue an RCU callback for invocation after a grace period.
499 * @head: structure to be used for queueing the RCU updates. 693 * @head: structure to be used for queueing the RCU updates.
500 * @func: actual update function to be invoked after the grace period 694 * @func: actual callback function to be invoked after the grace period
501 * 695 *
502 * The update function will be invoked some time after a full grace 696 * The callback function will be invoked some time after a full grace
503 * period elapses, in other words after all currently executing RCU 697 * period elapses, in other words after all pre-existing RCU read-side
504 * read-side critical sections have completed. RCU read-side critical 698 * critical sections have completed. However, the callback function
699 * might well execute concurrently with RCU read-side critical sections
700 * that started after call_rcu() was invoked. RCU read-side critical
505 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 701 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
506 * and may be nested. 702 * and may be nested.
507 */ 703 */
508extern void call_rcu(struct rcu_head *head, 704extern void call_rcu(struct rcu_head *head,
509 void (*func)(struct rcu_head *head)); 705 void (*func)(struct rcu_head *head));
510 706
707#else /* #ifdef CONFIG_PREEMPT_RCU */
708
709/* In classic RCU, call_rcu() is just call_rcu_sched(). */
710#define call_rcu call_rcu_sched
711
712#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
713
511/** 714/**
512 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. 715 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
513 * @head: structure to be used for queueing the RCU updates. 716 * @head: structure to be used for queueing the RCU updates.
514 * @func: actual update function to be invoked after the grace period 717 * @func: actual callback function to be invoked after the grace period
515 * 718 *
516 * The update function will be invoked some time after a full grace 719 * The callback function will be invoked some time after a full grace
517 * period elapses, in other words after all currently executing RCU 720 * period elapses, in other words after all currently executing RCU
518 * read-side critical sections have completed. call_rcu_bh() assumes 721 * read-side critical sections have completed. call_rcu_bh() assumes
519 * that the read-side critical sections end on completion of a softirq 722 * that the read-side critical sections end on completion of a softirq
@@ -566,37 +769,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
566} 769}
567#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 770#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
568 771
569#ifndef CONFIG_PROVE_RCU
570#define __do_rcu_dereference_check(c) do { } while (0)
571#endif /* #ifdef CONFIG_PROVE_RCU */
572
573#define __rcu_dereference_index_check(p, c) \
574 ({ \
575 typeof(p) _________p1 = ACCESS_ONCE(p); \
576 __do_rcu_dereference_check(c); \
577 smp_read_barrier_depends(); \
578 (_________p1); \
579 })
580
581/**
582 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
583 * @p: The pointer to read, prior to dereferencing
584 * @c: The conditions under which the dereference will take place
585 *
586 * Similar to rcu_dereference_check(), but omits the sparse checking.
587 * This allows rcu_dereference_index_check() to be used on integers,
588 * which can then be used as array indices. Attempting to use
589 * rcu_dereference_check() on an integer will give compiler warnings
590 * because the sparse address-space mechanism relies on dereferencing
591 * the RCU-protected pointer. Dereferencing integers is not something
592 * that even gcc will put up with.
593 *
594 * Note that this function does not implicitly check for RCU read-side
595 * critical sections. If this function gains lots of uses, it might
596 * make sense to provide versions for each flavor of RCU, but it does
597 * not make sense as of early 2010.
598 */
599#define rcu_dereference_index_check(p, c) \
600 __rcu_dereference_index_check((p), (c))
601
602#endif /* __LINUX_RCUPDATE_H */ 772#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e2e893144a8..13877cb93a6 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,103 +27,101 @@
27 27
28#include <linux/cache.h> 28#include <linux/cache.h>
29 29
30void rcu_sched_qs(int cpu); 30#define rcu_init_sched() do { } while (0)
31void rcu_bh_qs(int cpu);
32static inline void rcu_note_context_switch(int cpu)
33{
34 rcu_sched_qs(cpu);
35}
36 31
37#define __rcu_read_lock() preempt_disable() 32#ifdef CONFIG_TINY_RCU
38#define __rcu_read_unlock() preempt_enable()
39#define __rcu_read_lock_bh() local_bh_disable()
40#define __rcu_read_unlock_bh() local_bh_enable()
41#define call_rcu_sched call_rcu
42 33
43#define rcu_init_sched() do { } while (0) 34static inline void synchronize_rcu_expedited(void)
44extern void rcu_check_callbacks(int cpu, int user); 35{
36 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
37}
45 38
46static inline int rcu_needs_cpu(int cpu) 39static inline void rcu_barrier(void)
47{ 40{
48 return 0; 41 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
49} 42}
50 43
51/* 44#else /* #ifdef CONFIG_TINY_RCU */
52 * Return the number of grace periods. 45
53 */ 46void rcu_barrier(void);
54static inline long rcu_batches_completed(void) 47void synchronize_rcu_expedited(void);
48
49#endif /* #else #ifdef CONFIG_TINY_RCU */
50
51static inline void synchronize_rcu_bh(void)
55{ 52{
56 return 0; 53 synchronize_sched();
57} 54}
58 55
59/* 56static inline void synchronize_rcu_bh_expedited(void)
60 * Return the number of bottom-half grace periods.
61 */
62static inline long rcu_batches_completed_bh(void)
63{ 57{
64 return 0; 58 synchronize_sched();
65} 59}
66 60
67static inline void rcu_force_quiescent_state(void) 61#ifdef CONFIG_TINY_RCU
62
63static inline void rcu_preempt_note_context_switch(void)
68{ 64{
69} 65}
70 66
71static inline void rcu_bh_force_quiescent_state(void) 67static inline void exit_rcu(void)
72{ 68{
73} 69}
74 70
75static inline void rcu_sched_force_quiescent_state(void) 71static inline int rcu_needs_cpu(int cpu)
76{ 72{
73 return 0;
77} 74}
78 75
79extern void synchronize_sched(void); 76#else /* #ifdef CONFIG_TINY_RCU */
77
78void rcu_preempt_note_context_switch(void);
79extern void exit_rcu(void);
80int rcu_preempt_needs_cpu(void);
80 81
81static inline void synchronize_rcu(void) 82static inline int rcu_needs_cpu(int cpu)
82{ 83{
83 synchronize_sched(); 84 return rcu_preempt_needs_cpu();
84} 85}
85 86
86static inline void synchronize_rcu_bh(void) 87#endif /* #else #ifdef CONFIG_TINY_RCU */
88
89static inline void rcu_note_context_switch(int cpu)
87{ 90{
88 synchronize_sched(); 91 rcu_sched_qs(cpu);
92 rcu_preempt_note_context_switch();
89} 93}
90 94
91static inline void synchronize_rcu_expedited(void) 95/*
96 * Return the number of grace periods.
97 */
98static inline long rcu_batches_completed(void)
92{ 99{
93 synchronize_sched(); 100 return 0;
94} 101}
95 102
96static inline void synchronize_rcu_bh_expedited(void) 103/*
104 * Return the number of bottom-half grace periods.
105 */
106static inline long rcu_batches_completed_bh(void)
97{ 107{
98 synchronize_sched(); 108 return 0;
99} 109}
100 110
101struct notifier_block; 111static inline void rcu_force_quiescent_state(void)
102
103#ifdef CONFIG_NO_HZ
104
105extern void rcu_enter_nohz(void);
106extern void rcu_exit_nohz(void);
107
108#else /* #ifdef CONFIG_NO_HZ */
109
110static inline void rcu_enter_nohz(void)
111{ 112{
112} 113}
113 114
114static inline void rcu_exit_nohz(void) 115static inline void rcu_bh_force_quiescent_state(void)
115{ 116{
116} 117}
117 118
118#endif /* #else #ifdef CONFIG_NO_HZ */ 119static inline void rcu_sched_force_quiescent_state(void)
119
120static inline void exit_rcu(void)
121{ 120{
122} 121}
123 122
124static inline int rcu_preempt_depth(void) 123static inline void rcu_cpu_stall_reset(void)
125{ 124{
126 return 0;
127} 125}
128 126
129#ifdef CONFIG_DEBUG_LOCK_ALLOC 127#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0ed1c056f2..95518e62879 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,64 +30,23 @@
30#ifndef __LINUX_RCUTREE_H 30#ifndef __LINUX_RCUTREE_H
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33struct notifier_block;
34
35extern void rcu_sched_qs(int cpu);
36extern void rcu_bh_qs(int cpu);
37extern void rcu_note_context_switch(int cpu); 33extern void rcu_note_context_switch(int cpu);
38extern int rcu_needs_cpu(int cpu); 34extern int rcu_needs_cpu(int cpu);
35extern void rcu_cpu_stall_reset(void);
39 36
40#ifdef CONFIG_TREE_PREEMPT_RCU 37#ifdef CONFIG_TREE_PREEMPT_RCU
41 38
42extern void __rcu_read_lock(void);
43extern void __rcu_read_unlock(void);
44extern void synchronize_rcu(void);
45extern void exit_rcu(void); 39extern void exit_rcu(void);
46 40
47/*
48 * Defined as macro as it is a very low level header
49 * included from areas that don't even know about current
50 */
51#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
52
53#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 41#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
54 42
55static inline void __rcu_read_lock(void)
56{
57 preempt_disable();
58}
59
60static inline void __rcu_read_unlock(void)
61{
62 preempt_enable();
63}
64
65#define synchronize_rcu synchronize_sched
66
67static inline void exit_rcu(void) 43static inline void exit_rcu(void)
68{ 44{
69} 45}
70 46
71static inline int rcu_preempt_depth(void)
72{
73 return 0;
74}
75
76#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 47#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
77 48
78static inline void __rcu_read_lock_bh(void)
79{
80 local_bh_disable();
81}
82static inline void __rcu_read_unlock_bh(void)
83{
84 local_bh_enable();
85}
86
87extern void call_rcu_sched(struct rcu_head *head,
88 void (*func)(struct rcu_head *rcu));
89extern void synchronize_rcu_bh(void); 49extern void synchronize_rcu_bh(void);
90extern void synchronize_sched(void);
91extern void synchronize_rcu_expedited(void); 50extern void synchronize_rcu_expedited(void);
92 51
93static inline void synchronize_rcu_bh_expedited(void) 52static inline void synchronize_rcu_bh_expedited(void)
@@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void)
95 synchronize_sched_expedited(); 54 synchronize_sched_expedited();
96} 55}
97 56
98extern void rcu_check_callbacks(int cpu, int user); 57extern void rcu_barrier(void);
99 58
100extern long rcu_batches_completed(void); 59extern long rcu_batches_completed(void);
101extern long rcu_batches_completed_bh(void); 60extern long rcu_batches_completed_bh(void);
@@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void);
104extern void rcu_bh_force_quiescent_state(void); 63extern void rcu_bh_force_quiescent_state(void);
105extern void rcu_sched_force_quiescent_state(void); 64extern void rcu_sched_force_quiescent_state(void);
106 65
107#ifdef CONFIG_NO_HZ
108void rcu_enter_nohz(void);
109void rcu_exit_nohz(void);
110#else /* CONFIG_NO_HZ */
111static inline void rcu_enter_nohz(void)
112{
113}
114static inline void rcu_exit_nohz(void)
115{
116}
117#endif /* CONFIG_NO_HZ */
118
119/* A context switch is a grace period for RCU-sched and RCU-bh. */ 66/* A context switch is a grace period for RCU-sched and RCU-bh. */
120static inline int rcu_blocking_is_gp(void) 67static inline int rcu_blocking_is_gp(void)
121{ 68{
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7d..e18473f0eb7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,13 @@ struct task_struct {
1202 unsigned int policy; 1202 unsigned int policy;
1203 cpumask_t cpus_allowed; 1203 cpumask_t cpus_allowed;
1204 1204
1205#ifdef CONFIG_TREE_PREEMPT_RCU 1205#ifdef CONFIG_PREEMPT_RCU
1206 int rcu_read_lock_nesting; 1206 int rcu_read_lock_nesting;
1207 char rcu_read_unlock_special; 1207 char rcu_read_unlock_special;
1208 struct rcu_node *rcu_blocked_node;
1209 struct list_head rcu_node_entry; 1208 struct list_head rcu_node_entry;
1209#endif /* #ifdef CONFIG_PREEMPT_RCU */
1210#ifdef CONFIG_TREE_PREEMPT_RCU
1211 struct rcu_node *rcu_blocked_node;
1210#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1212#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1211 1213
1212#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1214#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -1288,9 +1290,9 @@ struct task_struct {
1288 struct list_head cpu_timers[3]; 1290 struct list_head cpu_timers[3];
1289 1291
1290/* process credentials */ 1292/* process credentials */
1291 const struct cred *real_cred; /* objective and real subjective task 1293 const struct cred __rcu *real_cred; /* objective and real subjective task
1292 * credentials (COW) */ 1294 * credentials (COW) */
1293 const struct cred *cred; /* effective (overridable) subjective task 1295 const struct cred __rcu *cred; /* effective (overridable) subjective task
1294 * credentials (COW) */ 1296 * credentials (COW) */
1295 struct mutex cred_guard_mutex; /* guard against foreign influences on 1297 struct mutex cred_guard_mutex; /* guard against foreign influences on
1296 * credential calculations 1298 * credential calculations
@@ -1418,7 +1420,7 @@ struct task_struct {
1418#endif 1420#endif
1419#ifdef CONFIG_CGROUPS 1421#ifdef CONFIG_CGROUPS
1420 /* Control Group info protected by css_set_lock */ 1422 /* Control Group info protected by css_set_lock */
1421 struct css_set *cgroups; 1423 struct css_set __rcu *cgroups;
1422 /* cg_list protected by css_set_lock and tsk->alloc_lock */ 1424 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1423 struct list_head cg_list; 1425 struct list_head cg_list;
1424#endif 1426#endif
@@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1740#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) 1742#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1741#define used_math() tsk_used_math(current) 1743#define used_math() tsk_used_math(current)
1742 1744
1743#ifdef CONFIG_TREE_PREEMPT_RCU 1745#ifdef CONFIG_PREEMPT_RCU
1744 1746
1745#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1747#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1746#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1748#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
@@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p)
1749{ 1751{
1750 p->rcu_read_lock_nesting = 0; 1752 p->rcu_read_lock_nesting = 0;
1751 p->rcu_read_unlock_special = 0; 1753 p->rcu_read_unlock_special = 0;
1754#ifdef CONFIG_TREE_PREEMPT_RCU
1752 p->rcu_blocked_node = NULL; 1755 p->rcu_blocked_node = NULL;
1756#endif
1753 INIT_LIST_HEAD(&p->rcu_node_entry); 1757 INIT_LIST_HEAD(&p->rcu_node_entry);
1754} 1758}
1755 1759
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4d5d2f546db..58971e891f4 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
108#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 108#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
109 109
110/** 110/**
111 * srcu_dereference - fetch SRCU-protected pointer with checking 111 * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
112 * @p: the pointer to fetch and protect for later dereferencing
113 * @sp: pointer to the srcu_struct, which is used to check that we
114 * really are in an SRCU read-side critical section.
115 * @c: condition to check for update-side use
112 * 116 *
113 * Makes rcu_dereference_check() do the dirty work. 117 * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
118 * critical section will result in an RCU-lockdep splat, unless @c evaluates
119 * to 1. The @c argument will normally be a logical expression containing
120 * lockdep_is_held() calls.
114 */ 121 */
115#define srcu_dereference(p, sp) \ 122#define srcu_dereference_check(p, sp, c) \
116 rcu_dereference_check(p, srcu_read_lock_held(sp)) 123 __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
124
125/**
126 * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
127 * @p: the pointer to fetch and protect for later dereferencing
128 * @sp: pointer to the srcu_struct, which is used to check that we
129 * really are in an SRCU read-side critical section.
130 *
131 * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU
132 * is enabled, invoking this outside of an RCU read-side critical
133 * section will result in an RCU-lockdep splat.
134 */
135#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
117 136
118/** 137/**
119 * srcu_read_lock - register a new reader for an SRCU-protected structure. 138 * srcu_read_lock - register a new reader for an SRCU-protected structure.
120 * @sp: srcu_struct in which to register the new reader. 139 * @sp: srcu_struct in which to register the new reader.
121 * 140 *
122 * Enter an SRCU read-side critical section. Note that SRCU read-side 141 * Enter an SRCU read-side critical section. Note that SRCU read-side
123 * critical sections may be nested. 142 * critical sections may be nested. However, it is illegal to
143 * call anything that waits on an SRCU grace period for the same
144 * srcu_struct, whether directly or indirectly. Please note that
145 * one way to indirectly wait on an SRCU grace period is to acquire
146 * a mutex that is held elsewhere while calling synchronize_srcu() or
147 * synchronize_srcu_expedited().
124 */ 148 */
125static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) 149static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
126{ 150{
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 671538d25bc..8eee9dbbfe7 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
69 enum rpc_gss_proc gc_proc; 69 enum rpc_gss_proc gc_proc;
70 u32 gc_seq; 70 u32 gc_seq;
71 spinlock_t gc_seq_lock; 71 spinlock_t gc_seq_lock;
72 struct gss_ctx *gc_gss_ctx; 72 struct gss_ctx __rcu *gc_gss_ctx;
73 struct xdr_netobj gc_wire_ctx; 73 struct xdr_netobj gc_wire_ctx;
74 u32 gc_win; 74 u32 gc_win;
75 unsigned long gc_expiry; 75 unsigned long gc_expiry;
@@ -80,7 +80,7 @@ struct gss_upcall_msg;
80struct gss_cred { 80struct gss_cred {
81 struct rpc_cred gc_base; 81 struct rpc_cred gc_base;
82 enum rpc_gss_svc gc_service; 82 enum rpc_gss_svc gc_service;
83 struct gss_cl_ctx *gc_ctx; 83 struct gss_cl_ctx __rcu *gc_ctx;
84 struct gss_upcall_msg *gc_upcall; 84 struct gss_upcall_msg *gc_upcall;
85 unsigned long gc_upcall_timestamp; 85 unsigned long gc_upcall_timestamp;
86 unsigned char gc_machine_cred : 1; 86 unsigned char gc_machine_cred : 1;
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 726cc353640..dd1fdb8293f 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p)
45 return 0; 45 return 0;
46 46
47 rcu_read_lock(); 47 rcu_read_lock();
48 id = rcu_dereference(net_cls_subsys_id); 48 id = rcu_dereference_index_check(net_cls_subsys_id,
49 rcu_read_lock_held());
49 if (id >= 0) 50 if (id >= 0)
50 classid = container_of(task_subsys_state(p, id), 51 classid = container_of(task_subsys_state(p, id),
51 struct cgroup_cls_state, css)->classid; 52 struct cgroup_cls_state, css)->classid;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae54fa..caf17db87db 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -75,7 +75,7 @@ struct nf_conntrack_helper;
75/* nf_conn feature for connections that have a helper */ 75/* nf_conn feature for connections that have a helper */
76struct nf_conn_help { 76struct nf_conn_help {
77 /* Helper. if any */ 77 /* Helper. if any */
78 struct nf_conntrack_helper *helper; 78 struct nf_conntrack_helper __rcu *helper;
79 79
80 union nf_conntrack_help help; 80 union nf_conntrack_help help;
81 81
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1cbadd..a619a1ac7f4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -340,6 +340,7 @@ choice
340 340
341config TREE_RCU 341config TREE_RCU
342 bool "Tree-based hierarchical RCU" 342 bool "Tree-based hierarchical RCU"
343 depends on !PREEMPT && SMP
343 help 344 help
344 This option selects the RCU implementation that is 345 This option selects the RCU implementation that is
345 designed for very large SMP system with hundreds or 346 designed for very large SMP system with hundreds or
@@ -347,7 +348,7 @@ config TREE_RCU
347 smaller systems. 348 smaller systems.
348 349
349config TREE_PREEMPT_RCU 350config TREE_PREEMPT_RCU
350 bool "Preemptable tree-based hierarchical RCU" 351 bool "Preemptible tree-based hierarchical RCU"
351 depends on PREEMPT 352 depends on PREEMPT
352 help 353 help
353 This option selects the RCU implementation that is 354 This option selects the RCU implementation that is
@@ -365,8 +366,22 @@ config TINY_RCU
365 is not required. This option greatly reduces the 366 is not required. This option greatly reduces the
366 memory footprint of RCU. 367 memory footprint of RCU.
367 368
369config TINY_PREEMPT_RCU
370 bool "Preemptible UP-only small-memory-footprint RCU"
371 depends on !SMP && PREEMPT
372 help
373 This option selects the RCU implementation that is designed
374 for real-time UP systems. This option greatly reduces the
375 memory footprint of RCU.
376
368endchoice 377endchoice
369 378
379config PREEMPT_RCU
380 def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
381 help
382 This option enables preemptible-RCU code that is common between
383 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
384
370config RCU_TRACE 385config RCU_TRACE
371 bool "Enable tracing for RCU" 386 bool "Enable tracing for RCU"
372 depends on TREE_RCU || TREE_PREEMPT_RCU 387 depends on TREE_RCU || TREE_PREEMPT_RCU
@@ -387,9 +402,12 @@ config RCU_FANOUT
387 help 402 help
388 This option controls the fanout of hierarchical implementations 403 This option controls the fanout of hierarchical implementations
389 of RCU, allowing RCU to work efficiently on machines with 404 of RCU, allowing RCU to work efficiently on machines with
390 large numbers of CPUs. This value must be at least the cube 405 large numbers of CPUs. This value must be at least the fourth
391 root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit 406 root of NR_CPUS, which allows NR_CPUS to be insanely large.
392 systems and up to 262,144 for 64-bit systems. 407 The default value of RCU_FANOUT should be used for production
408 systems, but if you are stress-testing the RCU implementation
409 itself, small RCU_FANOUT values allow you to test large-system
410 code paths on small(er) systems.
393 411
394 Select a specific number if testing RCU itself. 412 Select a specific number if testing RCU itself.
395 Take the default if unsure. 413 Take the default if unsure.
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be..17046b6e7c9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o 86obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 87obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
88obj-$(CONFIG_TINY_RCU) += rcutiny.o 88obj-$(CONFIG_TINY_RCU) += rcutiny.o
89obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
89obj-$(CONFIG_RELAY) += relay.o 90obj-$(CONFIG_RELAY) += relay.o
90obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 91obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
91obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 92obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c5b0f..e5c5497a7dc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -138,7 +138,7 @@ struct css_id {
138 * is called after synchronize_rcu(). But for safe use, css_is_removed() 138 * is called after synchronize_rcu(). But for safe use, css_is_removed()
139 * css_tryget() should be used for avoiding race. 139 * css_tryget() should be used for avoiding race.
140 */ 140 */
141 struct cgroup_subsys_state *css; 141 struct cgroup_subsys_state __rcu *css;
142 /* 142 /*
143 * ID of this css. 143 * ID of this css.
144 */ 144 */
diff --git a/kernel/pid.c b/kernel/pid.c
index d55c6fb8d08..39b65b69584 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
401 struct task_struct *result = NULL; 401 struct task_struct *result = NULL;
402 if (pid) { 402 if (pid) {
403 struct hlist_node *first; 403 struct hlist_node *first;
404 first = rcu_dereference_check(pid->tasks[type].first, 404 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
405 rcu_read_lock_held() || 405 rcu_read_lock_held() ||
406 lockdep_tasklist_lock_is_held()); 406 lockdep_tasklist_lock_is_held());
407 if (first) 407 if (first)
@@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task);
416 */ 416 */
417struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) 417struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
418{ 418{
419 rcu_lockdep_assert(rcu_read_lock_held());
419 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); 420 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
420} 421}
421 422
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4d169835fb3..6c79e851521 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void)
73EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 73EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
74 74
75/** 75/**
76 * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? 76 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
77 * 77 *
78 * Check for bottom half being disabled, which covers both the 78 * Check for bottom half being disabled, which covers both the
79 * CONFIG_PROVE_RCU and not cases. Note that if someone uses 79 * CONFIG_PROVE_RCU and not cases. Note that if someone uses
80 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) 80 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
81 * will show the situation. 81 * will show the situation. This is useful for debug checks in functions
82 * that require that they be called within an RCU read-side critical
83 * section.
82 * 84 *
83 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 85 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
84 */ 86 */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02f8be..d806735342a 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
59EXPORT_SYMBOL_GPL(rcu_scheduler_active); 59EXPORT_SYMBOL_GPL(rcu_scheduler_active);
60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
61 61
62/* Forward declarations for rcutiny_plugin.h. */
63static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
64static void __call_rcu(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu),
66 struct rcu_ctrlblk *rcp);
67
68#include "rcutiny_plugin.h"
69
62#ifdef CONFIG_NO_HZ 70#ifdef CONFIG_NO_HZ
63 71
64static long rcu_dynticks_nesting = 1; 72static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
140 rcu_sched_qs(cpu); 148 rcu_sched_qs(cpu);
141 else if (!in_softirq()) 149 else if (!in_softirq())
142 rcu_bh_qs(cpu); 150 rcu_bh_qs(cpu);
151 rcu_preempt_check_callbacks();
143} 152}
144 153
145/* 154/*
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
162 *rcp->donetail = NULL; 171 *rcp->donetail = NULL;
163 if (rcp->curtail == rcp->donetail) 172 if (rcp->curtail == rcp->donetail)
164 rcp->curtail = &rcp->rcucblist; 173 rcp->curtail = &rcp->rcucblist;
174 rcu_preempt_remove_callbacks(rcp);
165 rcp->donetail = &rcp->rcucblist; 175 rcp->donetail = &rcp->rcucblist;
166 local_irq_restore(flags); 176 local_irq_restore(flags);
167 177
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
182{ 192{
183 __rcu_process_callbacks(&rcu_sched_ctrlblk); 193 __rcu_process_callbacks(&rcu_sched_ctrlblk);
184 __rcu_process_callbacks(&rcu_bh_ctrlblk); 194 __rcu_process_callbacks(&rcu_bh_ctrlblk);
195 rcu_preempt_process_callbacks();
185} 196}
186 197
187/* 198/*
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
223} 234}
224 235
225/* 236/*
226 * Post an RCU callback to be invoked after the end of an RCU grace 237 * Post an RCU callback to be invoked after the end of an RCU-sched grace
227 * period. But since we have but one CPU, that would be after any 238 * period. But since we have but one CPU, that would be after any
228 * quiescent state. 239 * quiescent state.
229 */ 240 */
230void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 241void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
231{ 242{
232 __call_rcu(head, func, &rcu_sched_ctrlblk); 243 __call_rcu(head, func, &rcu_sched_ctrlblk);
233} 244}
234EXPORT_SYMBOL_GPL(call_rcu); 245EXPORT_SYMBOL_GPL(call_rcu_sched);
235 246
236/* 247/*
237 * Post an RCU bottom-half callback to be invoked after any subsequent 248 * Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
243} 254}
244EXPORT_SYMBOL_GPL(call_rcu_bh); 255EXPORT_SYMBOL_GPL(call_rcu_bh);
245 256
246void rcu_barrier(void)
247{
248 struct rcu_synchronize rcu;
249
250 init_rcu_head_on_stack(&rcu.head);
251 init_completion(&rcu.completion);
252 /* Will wake me after RCU finished. */
253 call_rcu(&rcu.head, wakeme_after_rcu);
254 /* Wait for it. */
255 wait_for_completion(&rcu.completion);
256 destroy_rcu_head_on_stack(&rcu.head);
257}
258EXPORT_SYMBOL_GPL(rcu_barrier);
259
260void rcu_barrier_bh(void) 257void rcu_barrier_bh(void)
261{ 258{
262 struct rcu_synchronize rcu; 259 struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@ void __init rcu_init(void)
289{ 286{
290 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 287 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291} 288}
292
293#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index d223a92bc74..c5bea1137dc 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -17,11 +17,583 @@
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright IBM Corporation, 2009 20 * Copyright (c) 2010 Linaro
21 * 21 *
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25#ifdef CONFIG_TINY_PREEMPT_RCU
26
27#include <linux/delay.h>
28
29/* Global control variables for preemptible RCU. */
30struct rcu_preempt_ctrlblk {
31 struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
32 struct rcu_head **nexttail;
33 /* Tasks blocked in a preemptible RCU */
34 /* read-side critical section while an */
35 /* preemptible-RCU grace period is in */
36 /* progress must wait for a later grace */
37 /* period. This pointer points to the */
38 /* ->next pointer of the last task that */
39 /* must wait for a later grace period, or */
40 /* to &->rcb.rcucblist if there is no */
41 /* such task. */
42 struct list_head blkd_tasks;
43 /* Tasks blocked in RCU read-side critical */
44 /* section. Tasks are placed at the head */
45 /* of this list and age towards the tail. */
46 struct list_head *gp_tasks;
47 /* Pointer to the first task blocking the */
48 /* current grace period, or NULL if there */
49 /* is not such task. */
50 struct list_head *exp_tasks;
51 /* Pointer to first task blocking the */
52 /* current expedited grace period, or NULL */
53 /* if there is no such task. If there */
54 /* is no current expedited grace period, */
55 /* then there cannot be any such task. */
56 u8 gpnum; /* Current grace period. */
57 u8 gpcpu; /* Last grace period blocked by the CPU. */
58 u8 completed; /* Last grace period completed. */
59 /* If all three are equal, RCU is idle. */
60};
61
62static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
63 .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
64 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
65 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
66 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
67};
68
69static int rcu_preempted_readers_exp(void);
70static void rcu_report_exp_done(void);
71
72/*
73 * Return true if the CPU has not yet responded to the current grace period.
74 */
75static int rcu_cpu_cur_gp(void)
76{
77 return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
78}
79
80/*
81 * Check for a running RCU reader. Because there is only one CPU,
82 * there can be but one running RCU reader at a time. ;-)
83 */
84static int rcu_preempt_running_reader(void)
85{
86 return current->rcu_read_lock_nesting;
87}
88
89/*
90 * Check for preempted RCU readers blocking any grace period.
91 * If the caller needs a reliable answer, it must disable hard irqs.
92 */
93static int rcu_preempt_blocked_readers_any(void)
94{
95 return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
96}
97
98/*
99 * Check for preempted RCU readers blocking the current grace period.
100 * If the caller needs a reliable answer, it must disable hard irqs.
101 */
102static int rcu_preempt_blocked_readers_cgp(void)
103{
104 return rcu_preempt_ctrlblk.gp_tasks != NULL;
105}
106
107/*
108 * Return true if another preemptible-RCU grace period is needed.
109 */
110static int rcu_preempt_needs_another_gp(void)
111{
112 return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
113}
114
115/*
116 * Return true if a preemptible-RCU grace period is in progress.
117 * The caller must disable hardirqs.
118 */
119static int rcu_preempt_gp_in_progress(void)
120{
121 return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
122}
123
124/*
125 * Record a preemptible-RCU quiescent state for the specified CPU. Note
126 * that this just means that the task currently running on the CPU is
127 * in a quiescent state. There might be any number of tasks blocked
128 * while in an RCU read-side critical section.
129 *
130 * Unlike the other rcu_*_qs() functions, callers to this function
131 * must disable irqs in order to protect the assignment to
132 * ->rcu_read_unlock_special.
133 *
134 * Because this is a single-CPU implementation, the only way a grace
135 * period can end is if the CPU is in a quiescent state. The reason is
136 * that a blocked preemptible-RCU reader can exit its critical section
137 * only if the CPU is running it at the time. Therefore, when the
138 * last task blocking the current grace period exits its RCU read-side
139 * critical section, neither the CPU nor blocked tasks will be stopping
140 * the current grace period. (In contrast, SMP implementations
141 * might have CPUs running in RCU read-side critical sections that
142 * block later grace periods -- but this is not possible given only
143 * one CPU.)
144 */
145static void rcu_preempt_cpu_qs(void)
146{
147 /* Record both CPU and task as having responded to current GP. */
148 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
149 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
150
151 /*
152 * If there is no GP, or if blocked readers are still blocking GP,
153 * then there is nothing more to do.
154 */
155 if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
156 return;
157
158 /* Advance callbacks. */
159 rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
160 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
161 rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
162
163 /* If there are no blocked readers, next GP is done instantly. */
164 if (!rcu_preempt_blocked_readers_any())
165 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
166
167 /* If there are done callbacks, make RCU_SOFTIRQ process them. */
168 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
169 raise_softirq(RCU_SOFTIRQ);
170}
171
172/*
173 * Start a new RCU grace period if warranted. Hard irqs must be disabled.
174 */
175static void rcu_preempt_start_gp(void)
176{
177 if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
178
179 /* Official start of GP. */
180 rcu_preempt_ctrlblk.gpnum++;
181
182 /* Any blocked RCU readers block new GP. */
183 if (rcu_preempt_blocked_readers_any())
184 rcu_preempt_ctrlblk.gp_tasks =
185 rcu_preempt_ctrlblk.blkd_tasks.next;
186
187 /* If there is no running reader, CPU is done with GP. */
188 if (!rcu_preempt_running_reader())
189 rcu_preempt_cpu_qs();
190 }
191}
192
193/*
194 * We have entered the scheduler, and the current task might soon be
195 * context-switched away from. If this task is in an RCU read-side
196 * critical section, we will no longer be able to rely on the CPU to
197 * record that fact, so we enqueue the task on the blkd_tasks list.
198 * If the task started after the current grace period began, as recorded
199 * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
200 * before the element referenced by ->gp_tasks (or at the tail if
201 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
202 * The task will dequeue itself when it exits the outermost enclosing
203 * RCU read-side critical section. Therefore, the current grace period
204 * cannot be permitted to complete until the ->gp_tasks pointer becomes
205 * NULL.
206 *
207 * Caller must disable preemption.
208 */
209void rcu_preempt_note_context_switch(void)
210{
211 struct task_struct *t = current;
212 unsigned long flags;
213
214 local_irq_save(flags); /* must exclude scheduler_tick(). */
215 if (rcu_preempt_running_reader() &&
216 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
217
218 /* Possibly blocking in an RCU read-side critical section. */
219 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
220
221 /*
222 * If this CPU has already checked in, then this task
223 * will hold up the next grace period rather than the
224 * current grace period. Queue the task accordingly.
225 * If the task is queued for the current grace period
226 * (i.e., this CPU has not yet passed through a quiescent
227 * state for the current grace period), then as long
228 * as that task remains queued, the current grace period
229 * cannot end.
230 */
231 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
232 if (rcu_cpu_cur_gp())
233 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
234 }
235
236 /*
237 * Either we were not in an RCU read-side critical section to
238 * begin with, or we have now recorded that critical section
239 * globally. Either way, we can now note a quiescent state
240 * for this CPU. Again, if we were in an RCU read-side critical
241 * section, and if that critical section was blocking the current
242 * grace period, then the fact that the task has been enqueued
243 * means that current grace period continues to be blocked.
244 */
245 rcu_preempt_cpu_qs();
246 local_irq_restore(flags);
247}
248
249/*
250 * Tiny-preemptible RCU implementation for rcu_read_lock().
251 * Just increment ->rcu_read_lock_nesting, shared state will be updated
252 * if we block.
253 */
254void __rcu_read_lock(void)
255{
256 current->rcu_read_lock_nesting++;
257 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
258}
259EXPORT_SYMBOL_GPL(__rcu_read_lock);
260
261/*
262 * Handle special cases during rcu_read_unlock(), such as needing to
263 * notify RCU core processing or task having blocked during the RCU
264 * read-side critical section.
265 */
266static void rcu_read_unlock_special(struct task_struct *t)
267{
268 int empty;
269 int empty_exp;
270 unsigned long flags;
271 struct list_head *np;
272 int special;
273
274 /*
275 * NMI handlers cannot block and cannot safely manipulate state.
276 * They therefore cannot possibly be special, so just leave.
277 */
278 if (in_nmi())
279 return;
280
281 local_irq_save(flags);
282
283 /*
284 * If RCU core is waiting for this CPU to exit critical section,
285 * let it know that we have done so.
286 */
287 special = t->rcu_read_unlock_special;
288 if (special & RCU_READ_UNLOCK_NEED_QS)
289 rcu_preempt_cpu_qs();
290
291 /* Hardware IRQ handlers cannot block. */
292 if (in_irq()) {
293 local_irq_restore(flags);
294 return;
295 }
296
297 /* Clean up if blocked during RCU read-side critical section. */
298 if (special & RCU_READ_UNLOCK_BLOCKED) {
299 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
300
301 /*
302 * Remove this task from the ->blkd_tasks list and adjust
303 * any pointers that might have been referencing it.
304 */
305 empty = !rcu_preempt_blocked_readers_cgp();
306 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
307 np = t->rcu_node_entry.next;
308 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
309 np = NULL;
310 list_del(&t->rcu_node_entry);
311 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
312 rcu_preempt_ctrlblk.gp_tasks = np;
313 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
314 rcu_preempt_ctrlblk.exp_tasks = np;
315 INIT_LIST_HEAD(&t->rcu_node_entry);
316
317 /*
318 * If this was the last task on the current list, and if
319 * we aren't waiting on the CPU, report the quiescent state
320 * and start a new grace period if needed.
321 */
322 if (!empty && !rcu_preempt_blocked_readers_cgp()) {
323 rcu_preempt_cpu_qs();
324 rcu_preempt_start_gp();
325 }
326
327 /*
328 * If this was the last task on the expedited lists,
329 * then we need wake up the waiting task.
330 */
331 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
332 rcu_report_exp_done();
333 }
334 local_irq_restore(flags);
335}
336
337/*
338 * Tiny-preemptible RCU implementation for rcu_read_unlock().
339 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
340 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
341 * invoke rcu_read_unlock_special() to clean up after a context switch
342 * in an RCU read-side critical section and other special cases.
343 */
344void __rcu_read_unlock(void)
345{
346 struct task_struct *t = current;
347
348 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
349 --t->rcu_read_lock_nesting;
350 barrier(); /* decrement before load of ->rcu_read_unlock_special */
351 if (t->rcu_read_lock_nesting == 0 &&
352 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
353 rcu_read_unlock_special(t);
354#ifdef CONFIG_PROVE_LOCKING
355 WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
356#endif /* #ifdef CONFIG_PROVE_LOCKING */
357}
358EXPORT_SYMBOL_GPL(__rcu_read_unlock);
359
360/*
361 * Check for a quiescent state from the current CPU. When a task blocks,
362 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
363 * checked elsewhere. This is called from the scheduling-clock interrupt.
364 *
365 * Caller must disable hard irqs.
366 */
367static void rcu_preempt_check_callbacks(void)
368{
369 struct task_struct *t = current;
370
371 if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress())
372 rcu_preempt_cpu_qs();
373 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
374 rcu_preempt_ctrlblk.rcb.donetail)
375 raise_softirq(RCU_SOFTIRQ);
376 if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader())
377 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
378}
379
380/*
381 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
382 * update, so this is invoked from __rcu_process_callbacks() to
383 * handle that case. Of course, it is invoked for all flavors of
384 * RCU, but RCU callbacks can appear only on one of the lists, and
385 * neither ->nexttail nor ->donetail can possibly be NULL, so there
386 * is no need for an explicit check.
387 */
388static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
389{
390 if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
391 rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
392}
393
394/*
395 * Process callbacks for preemptible RCU.
396 */
397static void rcu_preempt_process_callbacks(void)
398{
399 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
400}
401
402/*
403 * Queue a preemptible -RCU callback for invocation after a grace period.
404 */
405void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
406{
407 unsigned long flags;
408
409 debug_rcu_head_queue(head);
410 head->func = func;
411 head->next = NULL;
412
413 local_irq_save(flags);
414 *rcu_preempt_ctrlblk.nexttail = head;
415 rcu_preempt_ctrlblk.nexttail = &head->next;
416 rcu_preempt_start_gp(); /* checks to see if GP needed. */
417 local_irq_restore(flags);
418}
419EXPORT_SYMBOL_GPL(call_rcu);
420
421void rcu_barrier(void)
422{
423 struct rcu_synchronize rcu;
424
425 init_rcu_head_on_stack(&rcu.head);
426 init_completion(&rcu.completion);
427 /* Will wake me after RCU finished. */
428 call_rcu(&rcu.head, wakeme_after_rcu);
429 /* Wait for it. */
430 wait_for_completion(&rcu.completion);
431 destroy_rcu_head_on_stack(&rcu.head);
432}
433EXPORT_SYMBOL_GPL(rcu_barrier);
434
435/*
436 * synchronize_rcu - wait until a grace period has elapsed.
437 *
438 * Control will return to the caller some time after a full grace
439 * period has elapsed, in other words after all currently executing RCU
440 * read-side critical sections have completed. RCU read-side critical
441 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
442 * and may be nested.
443 */
444void synchronize_rcu(void)
445{
446#ifdef CONFIG_DEBUG_LOCK_ALLOC
447 if (!rcu_scheduler_active)
448 return;
449#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
450
451 WARN_ON_ONCE(rcu_preempt_running_reader());
452 if (!rcu_preempt_blocked_readers_any())
453 return;
454
455 /* Once we get past the fastpath checks, same code as rcu_barrier(). */
456 rcu_barrier();
457}
458EXPORT_SYMBOL_GPL(synchronize_rcu);
459
460static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
461static unsigned long sync_rcu_preempt_exp_count;
462static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
463
464/*
465 * Return non-zero if there are any tasks in RCU read-side critical
466 * sections blocking the current preemptible-RCU expedited grace period.
467 * If there is no preemptible-RCU expedited grace period currently in
468 * progress, returns zero unconditionally.
469 */
470static int rcu_preempted_readers_exp(void)
471{
472 return rcu_preempt_ctrlblk.exp_tasks != NULL;
473}
474
475/*
476 * Report the exit from RCU read-side critical section for the last task
477 * that queued itself during or before the current expedited preemptible-RCU
478 * grace period.
479 */
480static void rcu_report_exp_done(void)
481{
482 wake_up(&sync_rcu_preempt_exp_wq);
483}
484
485/*
486 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
487 * is to rely in the fact that there is but one CPU, and that it is
488 * illegal for a task to invoke synchronize_rcu_expedited() while in a
489 * preemptible-RCU read-side critical section. Therefore, any such
490 * critical sections must correspond to blocked tasks, which must therefore
491 * be on the ->blkd_tasks list. So just record the current head of the
492 * list in the ->exp_tasks pointer, and wait for all tasks including and
493 * after the task pointed to by ->exp_tasks to drain.
494 */
495void synchronize_rcu_expedited(void)
496{
497 unsigned long flags;
498 struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
499 unsigned long snap;
500
501 barrier(); /* ensure prior action seen before grace period. */
502
503 WARN_ON_ONCE(rcu_preempt_running_reader());
504
505 /*
506 * Acquire lock so that there is only one preemptible RCU grace
507 * period in flight. Of course, if someone does the expedited
508 * grace period for us while we are acquiring the lock, just leave.
509 */
510 snap = sync_rcu_preempt_exp_count + 1;
511 mutex_lock(&sync_rcu_preempt_exp_mutex);
512 if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
513 goto unlock_mb_ret; /* Others did our work for us. */
514
515 local_irq_save(flags);
516
517 /*
518 * All RCU readers have to already be on blkd_tasks because
519 * we cannot legally be executing in an RCU read-side critical
520 * section.
521 */
522
523 /* Snapshot current head of ->blkd_tasks list. */
524 rpcp->exp_tasks = rpcp->blkd_tasks.next;
525 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
526 rpcp->exp_tasks = NULL;
527 local_irq_restore(flags);
528
529 /* Wait for tail of ->blkd_tasks list to drain. */
530 if (rcu_preempted_readers_exp())
531 wait_event(sync_rcu_preempt_exp_wq,
532 !rcu_preempted_readers_exp());
533
534 /* Clean up and exit. */
535 barrier(); /* ensure expedited GP seen before counter increment. */
536 sync_rcu_preempt_exp_count++;
537unlock_mb_ret:
538 mutex_unlock(&sync_rcu_preempt_exp_mutex);
539 barrier(); /* ensure subsequent action seen after grace period. */
540}
541EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
542
543/*
544 * Does preemptible RCU need the CPU to stay out of dynticks mode?
545 */
546int rcu_preempt_needs_cpu(void)
547{
548 if (!rcu_preempt_running_reader())
549 rcu_preempt_cpu_qs();
550 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
551}
552
553/*
554 * Check for a task exiting while in a preemptible -RCU read-side
555 * critical section, clean up if so. No need to issue warnings,
556 * as debug_check_no_locks_held() already does this if lockdep
557 * is enabled.
558 */
559void exit_rcu(void)
560{
561 struct task_struct *t = current;
562
563 if (t->rcu_read_lock_nesting == 0)
564 return;
565 t->rcu_read_lock_nesting = 1;
566 rcu_read_unlock();
567}
568
569#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
570
571/*
572 * Because preemptible RCU does not exist, it never has any callbacks
573 * to check.
574 */
575static void rcu_preempt_check_callbacks(void)
576{
577}
578
579/*
580 * Because preemptible RCU does not exist, it never has any callbacks
581 * to remove.
582 */
583static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
584{
585}
586
587/*
588 * Because preemptible RCU does not exist, it never has any callbacks
589 * to process.
590 */
591static void rcu_preempt_process_callbacks(void)
592{
593}
594
595#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
596
25#ifdef CONFIG_DEBUG_LOCK_ALLOC 597#ifdef CONFIG_DEBUG_LOCK_ALLOC
26 598
27#include <linux/kernel_stat.h> 599#include <linux/kernel_stat.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 2e2726d790b..729710273dc 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -303,6 +303,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp)
303 mdelay(longdelay_ms); 303 mdelay(longdelay_ms);
304 if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) 304 if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
305 udelay(shortdelay_us); 305 udelay(shortdelay_us);
306#ifdef CONFIG_PREEMPT
307 if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
308 preempt_schedule(); /* No QS if preempt_disable() in effect */
309#endif
306} 310}
307 311
308static void rcu_torture_read_unlock(int idx) __releases(RCU) 312static void rcu_torture_read_unlock(int idx) __releases(RCU)
@@ -536,6 +540,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp)
536 delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); 540 delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
537 if (!delay) 541 if (!delay)
538 schedule_timeout_interruptible(longdelay); 542 schedule_timeout_interruptible(longdelay);
543 else
544 rcu_read_delay(rrsp);
539} 545}
540 546
541static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) 547static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d5bc43976c5..42140a860bb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -143,6 +143,11 @@ module_param(blimit, int, 0);
143module_param(qhimark, int, 0); 143module_param(qhimark, int, 0);
144module_param(qlowmark, int, 0); 144module_param(qlowmark, int, 0);
145 145
146#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
147int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
148module_param(rcu_cpu_stall_suppress, int, 0644);
149#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
150
146static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 151static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
147static int rcu_pending(int cpu); 152static int rcu_pending(int cpu);
148 153
@@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
450 455
451#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 456#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
452 457
453int rcu_cpu_stall_panicking __read_mostly; 458int rcu_cpu_stall_suppress __read_mostly;
454 459
455static void record_gp_stall_check_time(struct rcu_state *rsp) 460static void record_gp_stall_check_time(struct rcu_state *rsp)
456{ 461{
@@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
482 rcu_print_task_stall(rnp); 487 rcu_print_task_stall(rnp);
483 raw_spin_unlock_irqrestore(&rnp->lock, flags); 488 raw_spin_unlock_irqrestore(&rnp->lock, flags);
484 489
485 /* OK, time to rat on our buddy... */ 490 /*
486 491 * OK, time to rat on our buddy...
492 * See Documentation/RCU/stallwarn.txt for info on how to debug
493 * RCU CPU stall warnings.
494 */
487 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 495 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
488 rsp->name); 496 rsp->name);
489 rcu_for_each_leaf_node(rsp, rnp) { 497 rcu_for_each_leaf_node(rsp, rnp) {
@@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
512 unsigned long flags; 520 unsigned long flags;
513 struct rcu_node *rnp = rcu_get_root(rsp); 521 struct rcu_node *rnp = rcu_get_root(rsp);
514 522
523 /*
524 * OK, time to rat on ourselves...
525 * See Documentation/RCU/stallwarn.txt for info on how to debug
526 * RCU CPU stall warnings.
527 */
515 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 528 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
516 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 529 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
517 trigger_all_cpu_backtrace(); 530 trigger_all_cpu_backtrace();
@@ -530,7 +543,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
530 long delta; 543 long delta;
531 struct rcu_node *rnp; 544 struct rcu_node *rnp;
532 545
533 if (rcu_cpu_stall_panicking) 546 if (rcu_cpu_stall_suppress)
534 return; 547 return;
535 delta = jiffies - rsp->jiffies_stall; 548 delta = jiffies - rsp->jiffies_stall;
536 rnp = rdp->mynode; 549 rnp = rdp->mynode;
@@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
548 561
549static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 562static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
550{ 563{
551 rcu_cpu_stall_panicking = 1; 564 rcu_cpu_stall_suppress = 1;
552 return NOTIFY_DONE; 565 return NOTIFY_DONE;
553} 566}
554 567
568/**
569 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
570 *
571 * Set the stall-warning timeout way off into the future, thus preventing
572 * any RCU CPU stall-warning messages from appearing in the current set of
573 * RCU grace periods.
574 *
575 * The caller must disable hard irqs.
576 */
577void rcu_cpu_stall_reset(void)
578{
579 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
580 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
581 rcu_preempt_stall_reset();
582}
583
555static struct notifier_block rcu_panic_block = { 584static struct notifier_block rcu_panic_block = {
556 .notifier_call = rcu_panic, 585 .notifier_call = rcu_panic,
557}; 586};
@@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
571{ 600{
572} 601}
573 602
603void rcu_cpu_stall_reset(void)
604{
605}
606
574static void __init check_cpu_stall_init(void) 607static void __init check_cpu_stall_init(void)
575{ 608{
576} 609}
@@ -712,7 +745,7 @@ static void
712rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 745rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
713 __releases(rcu_get_root(rsp)->lock) 746 __releases(rcu_get_root(rsp)->lock)
714{ 747{
715 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 748 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
716 struct rcu_node *rnp = rcu_get_root(rsp); 749 struct rcu_node *rnp = rcu_get_root(rsp);
717 750
718 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { 751 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
@@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
960static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 993static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
961{ 994{
962 int i; 995 int i;
963 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 996 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
964 997
965 if (rdp->nxtlist == NULL) 998 if (rdp->nxtlist == NULL)
966 return; /* irqs disabled, so comparison is stable. */ 999 return; /* irqs disabled, so comparison is stable. */
@@ -984,7 +1017,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
984 struct rcu_data *rdp; 1017 struct rcu_data *rdp;
985 1018
986 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1019 raw_spin_lock_irqsave(&rsp->onofflock, flags);
987 rdp = rsp->rda[smp_processor_id()]; 1020 rdp = this_cpu_ptr(rsp->rda);
988 if (rsp->orphan_cbs_list == NULL) { 1021 if (rsp->orphan_cbs_list == NULL) {
989 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 1022 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
990 return; 1023 return;
@@ -1007,7 +1040,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1007 unsigned long flags; 1040 unsigned long flags;
1008 unsigned long mask; 1041 unsigned long mask;
1009 int need_report = 0; 1042 int need_report = 0;
1010 struct rcu_data *rdp = rsp->rda[cpu]; 1043 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1011 struct rcu_node *rnp; 1044 struct rcu_node *rnp;
1012 1045
1013 /* Exclude any attempts to start a new grace period. */ 1046 /* Exclude any attempts to start a new grace period. */
@@ -1226,7 +1259,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1226 cpu = rnp->grplo; 1259 cpu = rnp->grplo;
1227 bit = 1; 1260 bit = 1;
1228 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1261 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1229 if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) 1262 if ((rnp->qsmask & bit) != 0 &&
1263 f(per_cpu_ptr(rsp->rda, cpu)))
1230 mask |= bit; 1264 mask |= bit;
1231 } 1265 }
1232 if (mask != 0) { 1266 if (mask != 0) {
@@ -1402,7 +1436,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1402 * a quiescent state betweentimes. 1436 * a quiescent state betweentimes.
1403 */ 1437 */
1404 local_irq_save(flags); 1438 local_irq_save(flags);
1405 rdp = rsp->rda[smp_processor_id()]; 1439 rdp = this_cpu_ptr(rsp->rda);
1406 rcu_process_gp_end(rsp, rdp); 1440 rcu_process_gp_end(rsp, rdp);
1407 check_for_new_grace_period(rsp, rdp); 1441 check_for_new_grace_period(rsp, rdp);
1408 1442
@@ -1701,7 +1735,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1701{ 1735{
1702 unsigned long flags; 1736 unsigned long flags;
1703 int i; 1737 int i;
1704 struct rcu_data *rdp = rsp->rda[cpu]; 1738 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1705 struct rcu_node *rnp = rcu_get_root(rsp); 1739 struct rcu_node *rnp = rcu_get_root(rsp);
1706 1740
1707 /* Set up local state, ensuring consistent view of global state. */ 1741 /* Set up local state, ensuring consistent view of global state. */
@@ -1729,7 +1763,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1729{ 1763{
1730 unsigned long flags; 1764 unsigned long flags;
1731 unsigned long mask; 1765 unsigned long mask;
1732 struct rcu_data *rdp = rsp->rda[cpu]; 1766 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1733 struct rcu_node *rnp = rcu_get_root(rsp); 1767 struct rcu_node *rnp = rcu_get_root(rsp);
1734 1768
1735 /* Set up local state, ensuring consistent view of global state. */ 1769 /* Set up local state, ensuring consistent view of global state. */
@@ -1865,7 +1899,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
1865/* 1899/*
1866 * Helper function for rcu_init() that initializes one rcu_state structure. 1900 * Helper function for rcu_init() that initializes one rcu_state structure.
1867 */ 1901 */
1868static void __init rcu_init_one(struct rcu_state *rsp) 1902static void __init rcu_init_one(struct rcu_state *rsp,
1903 struct rcu_data __percpu *rda)
1869{ 1904{
1870 static char *buf[] = { "rcu_node_level_0", 1905 static char *buf[] = { "rcu_node_level_0",
1871 "rcu_node_level_1", 1906 "rcu_node_level_1",
@@ -1918,37 +1953,23 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1918 } 1953 }
1919 } 1954 }
1920 1955
1956 rsp->rda = rda;
1921 rnp = rsp->level[NUM_RCU_LVLS - 1]; 1957 rnp = rsp->level[NUM_RCU_LVLS - 1];
1922 for_each_possible_cpu(i) { 1958 for_each_possible_cpu(i) {
1923 while (i > rnp->grphi) 1959 while (i > rnp->grphi)
1924 rnp++; 1960 rnp++;
1925 rsp->rda[i]->mynode = rnp; 1961 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
1926 rcu_boot_init_percpu_data(i, rsp); 1962 rcu_boot_init_percpu_data(i, rsp);
1927 } 1963 }
1928} 1964}
1929 1965
1930/*
1931 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1932 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1933 * structure.
1934 */
1935#define RCU_INIT_FLAVOR(rsp, rcu_data) \
1936do { \
1937 int i; \
1938 \
1939 for_each_possible_cpu(i) { \
1940 (rsp)->rda[i] = &per_cpu(rcu_data, i); \
1941 } \
1942 rcu_init_one(rsp); \
1943} while (0)
1944
1945void __init rcu_init(void) 1966void __init rcu_init(void)
1946{ 1967{
1947 int cpu; 1968 int cpu;
1948 1969
1949 rcu_bootup_announce(); 1970 rcu_bootup_announce();
1950 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1971 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
1951 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1972 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
1952 __rcu_init_preempt(); 1973 __rcu_init_preempt();
1953 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1974 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1954 1975
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 14c040b18ed..7918ba61873 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -254,19 +254,23 @@ struct rcu_data {
254#define RCU_STALL_DELAY_DELTA 0 254#define RCU_STALL_DELAY_DELTA 0
255#endif 255#endif
256 256
257#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) 257#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
258 RCU_STALL_DELAY_DELTA)
258 /* for rsp->jiffies_stall */ 259 /* for rsp->jiffies_stall */
259#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) 260#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
260 /* for rsp->jiffies_stall */ 261 /* for rsp->jiffies_stall */
261#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 262#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
262 /* to take at least one */ 263 /* to take at least one */
263 /* scheduling clock irq */ 264 /* scheduling clock irq */
264 /* before ratting on them. */ 265 /* before ratting on them. */
265 266
266#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 267#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
268#define RCU_CPU_STALL_SUPPRESS_INIT 0
269#else
270#define RCU_CPU_STALL_SUPPRESS_INIT 1
271#endif
267 272
268#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 273#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
269#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
270 274
271/* 275/*
272 * RCU global state, including node hierarchy. This hierarchy is 276 * RCU global state, including node hierarchy. This hierarchy is
@@ -283,7 +287,7 @@ struct rcu_state {
283 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ 287 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
284 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 288 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
285 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ 289 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
286 struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ 290 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
287 291
288 /* The following fields are guarded by the root rcu_node's lock. */ 292 /* The following fields are guarded by the root rcu_node's lock. */
289 293
@@ -365,6 +369,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
365#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 369#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
366static void rcu_print_detail_task_stall(struct rcu_state *rsp); 370static void rcu_print_detail_task_stall(struct rcu_state *rsp);
367static void rcu_print_task_stall(struct rcu_node *rnp); 371static void rcu_print_task_stall(struct rcu_node *rnp);
372static void rcu_preempt_stall_reset(void);
368#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 373#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
369static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 374static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
370#ifdef CONFIG_HOTPLUG_CPU 375#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 0e4f420245d..e9e0bc74ff3 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu)
154 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 154 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
155 155
156 /* Possibly blocking in an RCU read-side critical section. */ 156 /* Possibly blocking in an RCU read-side critical section. */
157 rdp = rcu_preempt_state.rda[cpu]; 157 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
158 rnp = rdp->mynode; 158 rnp = rdp->mynode;
159 raw_spin_lock_irqsave(&rnp->lock, flags); 159 raw_spin_lock_irqsave(&rnp->lock, flags);
160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu)
201 */ 201 */
202void __rcu_read_lock(void) 202void __rcu_read_lock(void)
203{ 203{
204 ACCESS_ONCE(current->rcu_read_lock_nesting)++; 204 current->rcu_read_lock_nesting++;
205 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ 205 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
206} 206}
207EXPORT_SYMBOL_GPL(__rcu_read_lock); 207EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -344,7 +344,9 @@ void __rcu_read_unlock(void)
344 struct task_struct *t = current; 344 struct task_struct *t = current;
345 345
346 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ 346 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
347 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && 347 --t->rcu_read_lock_nesting;
348 barrier(); /* decrement before load of ->rcu_read_unlock_special */
349 if (t->rcu_read_lock_nesting == 0 &&
348 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 350 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
349 rcu_read_unlock_special(t); 351 rcu_read_unlock_special(t);
350#ifdef CONFIG_PROVE_LOCKING 352#ifdef CONFIG_PROVE_LOCKING
@@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
417 } 419 }
418} 420}
419 421
422/*
423 * Suppress preemptible RCU's CPU stall warnings by pushing the
424 * time of the next stall-warning message comfortably far into the
425 * future.
426 */
427static void rcu_preempt_stall_reset(void)
428{
429 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
430}
431
420#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 432#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
421 433
422/* 434/*
@@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu);
546 * 558 *
547 * Control will return to the caller some time after a full grace 559 * Control will return to the caller some time after a full grace
548 * period has elapsed, in other words after all currently executing RCU 560 * period has elapsed, in other words after all currently executing RCU
549 * read-side critical sections have completed. RCU read-side critical 561 * read-side critical sections have completed. Note, however, that
550 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 562 * upon return from synchronize_rcu(), the caller might well be executing
551 * and may be nested. 563 * concurrently with new RCU read-side critical sections that began while
564 * synchronize_rcu() was waiting. RCU read-side critical sections are
565 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
552 */ 566 */
553void synchronize_rcu(void) 567void synchronize_rcu(void)
554{ 568{
@@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void)
771 */ 785 */
772static void __init __rcu_init_preempt(void) 786static void __init __rcu_init_preempt(void)
773{ 787{
774 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); 788 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
775} 789}
776 790
777/* 791/*
@@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
865{ 879{
866} 880}
867 881
882/*
883 * Because preemptible RCU does not exist, there is no need to suppress
884 * its CPU stall warnings.
885 */
886static void rcu_preempt_stall_reset(void)
887{
888}
889
868#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 890#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
869 891
870/* 892/*
@@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void)
919} 941}
920 942
921/* 943/*
922 * In classic RCU, call_rcu() is just call_rcu_sched().
923 */
924void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
925{
926 call_rcu_sched(head, func);
927}
928EXPORT_SYMBOL_GPL(call_rcu);
929
930/*
931 * Wait for an rcu-preempt grace period, but make it happen quickly. 944 * Wait for an rcu-preempt grace period, but make it happen quickly.
932 * But because preemptable RCU does not exist, map to rcu-sched. 945 * But because preemptable RCU does not exist, map to rcu-sched.
933 */ 946 */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 36c95b45738..458e032a3a3 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -262,7 +262,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
262 struct rcu_data *rdp; 262 struct rcu_data *rdp;
263 263
264 for_each_possible_cpu(cpu) { 264 for_each_possible_cpu(cpu) {
265 rdp = rsp->rda[cpu]; 265 rdp = per_cpu_ptr(rsp->rda, cpu);
266 if (rdp->beenonline) 266 if (rdp->beenonline)
267 print_one_rcu_pending(m, rdp); 267 print_one_rcu_pending(m, rdp);
268 } 268 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca..52c2172dff1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY
539 disabling, allowing multiple RCU-lockdep warnings to be printed 539 disabling, allowing multiple RCU-lockdep warnings to be printed
540 on a single reboot. 540 on a single reboot.
541 541
542config SPARSE_RCU_POINTER
543 bool "RCU debugging: sparse-based checks for pointer usage"
544 default n
545 help
546 This feature enables the __rcu sparse annotation for
547 RCU-protected pointers. This annotation will cause sparse
548 to flag any non-RCU used of annotated pointers. This can be
549 helpful when debugging RCU usage. Please note that this feature
550 is not intended to enforce code cleanliness; it is instead merely
551 a debugging aid.
552
553 Say Y to make sparse flag questionable use of RCU-protected pointers
554
542 Say N if you are unsure. 555 Say N if you are unsure.
543 556
544config LOCKDEP 557config LOCKDEP
@@ -832,6 +845,30 @@ config RCU_CPU_STALL_DETECTOR
832 845
833 Say Y if you are unsure. 846 Say Y if you are unsure.
834 847
848config RCU_CPU_STALL_TIMEOUT
849 int "RCU CPU stall timeout in seconds"
850 depends on RCU_CPU_STALL_DETECTOR
851 range 3 300
852 default 60
853 help
854 If a given RCU grace period extends more than the specified
855 number of seconds, a CPU stall warning is printed. If the
856 RCU grace period persists, additional CPU stall warnings are
857 printed at more widely spaced intervals.
858
859config RCU_CPU_STALL_DETECTOR_RUNNABLE
860 bool "RCU CPU stall checking starts automatically at boot"
861 depends on RCU_CPU_STALL_DETECTOR
862 default y
863 help
864 If set, start checking for RCU CPU stalls immediately on
865 boot. Otherwise, RCU CPU stall checking must be manually
866 enabled.
867
868 Say Y if you are unsure.
869
870 Say N if you wish to suppress RCU CPU stall checking during boot.
871
835config RCU_CPU_STALL_VERBOSE 872config RCU_CPU_STALL_VERBOSE
836 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" 873 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
837 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU 874 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5b7d4623f0b..0ccbcdf7500 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -49,7 +49,7 @@ struct radix_tree_node {
49 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
50 unsigned int count; 50 unsigned int count;
51 struct rcu_head rcu_head; 51 struct rcu_head rcu_head;
52 void *slots[RADIX_TREE_MAP_SIZE]; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE];
53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
54}; 54};
55 55
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 8c8632d9b93..957c9241fb0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock);
38static struct nf_conntrack_l3proto *l3proto __read_mostly; 38static struct nf_conntrack_l3proto *l3proto __read_mostly;
39 39
40#define MAX_IP_NAT_PROTO 256 40#define MAX_IP_NAT_PROTO 256
41static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] 41static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
42 __read_mostly; 42 __read_mostly;
43 43
44static inline const struct nf_nat_protocol * 44static inline const struct nf_nat_protocol *
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 78b505d33bf..fdaec7daff1 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -27,7 +27,7 @@
27 27
28static DEFINE_MUTEX(afinfo_mutex); 28static DEFINE_MUTEX(afinfo_mutex);
29 29
30const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; 30const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
31EXPORT_SYMBOL(nf_afinfo); 31EXPORT_SYMBOL(nf_afinfo);
32 32
33int nf_register_afinfo(const struct nf_afinfo *afinfo) 33int nf_register_afinfo(const struct nf_afinfo *afinfo)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index cdcc7649476..5702de35e2b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -26,10 +26,10 @@
26 26
27static DEFINE_MUTEX(nf_ct_ecache_mutex); 27static DEFINE_MUTEX(nf_ct_ecache_mutex);
28 28
29struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; 29struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
30EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); 30EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
31 31
32struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; 32struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
33EXPORT_SYMBOL_GPL(nf_expect_event_cb); 33EXPORT_SYMBOL_GPL(nf_expect_event_cb);
34 34
35/* deliver cached events and clear cache entry - must be called with locally 35/* deliver cached events and clear cache entry - must be called with locally
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 7dcf7a40419..1d9bdae0616 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -16,7 +16,7 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <net/netfilter/nf_conntrack_extend.h> 17#include <net/netfilter/nf_conntrack_extend.h>
18 18
19static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; 19static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
20static DEFINE_MUTEX(nf_ct_ext_type_mutex); 20static DEFINE_MUTEX(nf_ct_ext_type_mutex);
21 21
22void __nf_ct_ext_destroy(struct nf_conn *ct) 22void __nf_ct_ext_destroy(struct nf_conn *ct)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 5886ba1d52a..ed6d9295802 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30 30
31static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; 31static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
32struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; 32struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
33EXPORT_SYMBOL_GPL(nf_ct_l3protos); 33EXPORT_SYMBOL_GPL(nf_ct_l3protos);
34 34
35static DEFINE_MUTEX(nf_ct_proto_mutex); 35static DEFINE_MUTEX(nf_ct_proto_mutex);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 7df37fd786b..b07393eab88 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,7 @@
16#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
17#define NFLOGGER_NAME_LEN 64 17#define NFLOGGER_NAME_LEN 64
18 18
19static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; 19static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
20static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; 20static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
21static DEFINE_MUTEX(nf_log_mutex); 21static DEFINE_MUTEX(nf_log_mutex);
22 22
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 78b3cf9c519..74aebed5bd2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -18,7 +18,7 @@
18 * long term mutex. The handler must provide an an outfn() to accept packets 18 * long term mutex. The handler must provide an an outfn() to accept packets
19 * for queueing and must reinject all packets it receives, no matter what. 19 * for queueing and must reinject all packets it receives, no matter what.
20 */ 20 */
21static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; 21static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
22 22
23static DEFINE_MUTEX(queue_handler_mutex); 23static DEFINE_MUTEX(queue_handler_mutex);
24 24