aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/00-INDEX2
-rw-r--r--Documentation/RCU/trace.txt413
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--Documentation/lockstat.txt51
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt2
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile2
-rw-r--r--arch/mips/include/asm/byteorder.h5
-rw-r--r--arch/mips/include/asm/elf.h2
-rw-r--r--arch/parisc/include/asm/tlbflush.h5
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c4
-rw-r--r--arch/um/include/asm/system.h14
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pci_64.h1
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h6
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/amd_iommu.c5
-rw-r--r--arch/x86/kernel/amd_iommu_init.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c3
-rw-r--r--arch/x86/kernel/microcode_core.c19
-rw-r--r--arch/x86/kernel/microcode_intel.c6
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/pci-gart_64.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c29
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--drivers/acpi/bus.c8
-rw-r--r--drivers/acpi/utilities/utglobal.c2
-rw-r--r--drivers/block/cciss.c5
-rw-r--r--drivers/edac/edac_device.c12
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq.c4
-rw-r--r--drivers/ide/cs5530.c3
-rw-r--r--drivers/ide/sc1200.c3
-rw-r--r--drivers/md/bitmap.c22
-rw-r--r--drivers/media/dvb/b2c2/Kconfig2
-rw-r--r--drivers/media/dvb/b2c2/flexcop-fe-tuner.c2
-rw-r--r--drivers/media/dvb/b2c2/flexcop-i2c.c6
-rw-r--r--drivers/media/dvb/bt8xx/Kconfig2
-rw-r--r--drivers/media/dvb/dvb-usb/Kconfig46
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c6
-rw-r--r--drivers/media/dvb/ttpci/Kconfig2
-rw-r--r--drivers/media/video/compat_ioctl32.c2
-rw-r--r--drivers/media/video/cx18/Kconfig2
-rw-r--r--drivers/media/video/cx23885/Kconfig4
-rw-r--r--drivers/media/video/cx88/Kconfig2
-rw-r--r--drivers/media/video/em28xx/em28xx-video.c3
-rw-r--r--drivers/media/video/gspca/gspca.c4
-rw-r--r--drivers/media/video/pvrusb2/Kconfig2
-rw-r--r--drivers/media/video/saa7134/Kconfig4
-rw-r--r--drivers/message/fusion/mptscsih.c3
-rw-r--r--drivers/net/ppp_generic.c28
-rw-r--r--drivers/pci/hotplug/acpiphp.h2
-rw-r--r--drivers/pci/hotplug/acpiphp_core.c2
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c4
-rw-r--r--drivers/pci/hotplug/ibmphp_core.c5
-rw-r--r--drivers/pci/hotplug/pciehp_core.c23
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c2
-rw-r--r--drivers/rtc/rtc-isl1208.c7
-rw-r--r--drivers/scsi/aacraid/linit.c4
-rw-r--r--drivers/scsi/ibmvscsi/ibmvstgt.c16
-rw-r--r--drivers/scsi/libiscsi.c11
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/w1/w1_io.c4
-rw-r--r--fs/9p/fid.c4
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_dentry.c9
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--include/acpi/acoutput.h2
-rw-r--r--include/acpi/platform/aclinux.h6
-rw-r--r--include/asm-generic/bug.h7
-rw-r--r--include/linux/bottom_half.h1
-rw-r--r--include/linux/debug_locks.h2
-rw-r--r--include/linux/futex.h5
-rw-r--r--include/linux/hardirq.h14
-rw-r--r--include/linux/kernel.h11
-rw-r--r--include/linux/lockdep.h43
-rw-r--r--include/linux/mutex.h2
-rw-r--r--include/linux/rcuclassic.h2
-rw-r--r--include/linux/rcupdate.h10
-rw-r--r--include/linux/rcutree.h329
-rw-r--r--include/linux/swiotlb.h22
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--init/Kconfig86
-rw-r--r--kernel/Kconfig.preempt25
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/cgroup.c8
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/extable.c16
-rw-r--r--kernel/futex.c351
-rw-r--r--kernel/irq/manage.c12
-rw-r--r--kernel/lockdep.c60
-rw-r--r--kernel/lockdep_proc.c28
-rw-r--r--kernel/mutex.c10
-rw-r--r--kernel/notifier.c8
-rw-r--r--kernel/panic.c32
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/posix-timers.c6
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/rcupreempt.c10
-rw-r--r--kernel/rcupreempt_trace.c10
-rw-r--r--kernel/rcutorture.c66
-rw-r--r--kernel/rcutree.c1535
-rw-r--r--kernel/rcutree_trace.c271
-rw-r--r--kernel/resource.c9
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/softirq.c19
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/stacktrace.c11
-rw-r--r--kernel/sys.c2
-rw-r--r--lib/Kconfig.debug31
-rw-r--r--lib/debugobjects.c4
-rw-r--r--lib/swiotlb.c255
-rw-r--r--mm/memory.c15
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/socket.c1
-rw-r--r--sound/pci/hda/patch_sigmatel.c23
-rw-r--r--sound/soc/omap/omap-pcm.c2
126 files changed, 3688 insertions, 649 deletions
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 461481dfb7c3..7dc0695a8f90 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -16,6 +16,8 @@ RTFP.txt
16 - List of RCU papers (bibliography) going back to 1980. 16 - List of RCU papers (bibliography) going back to 1980.
17torture.txt 17torture.txt
18 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) 18 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
19trace.txt
20 - CONFIG_RCU_TRACE debugfs files and formats
19UP.txt 21UP.txt
20 - RCU on Uniprocessor Systems 22 - RCU on Uniprocessor Systems
21whatisRCU.txt 23whatisRCU.txt
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
new file mode 100644
index 000000000000..068848240a8b
--- /dev/null
+++ b/Documentation/RCU/trace.txt
@@ -0,0 +1,413 @@
1CONFIG_RCU_TRACE debugfs Files and Formats
2
3
4The rcupreempt and rcutree implementations of RCU provide debugfs trace
5output that summarizes counters and state. This information is useful for
6debugging RCU itself, and can sometimes also help to debug abuses of RCU.
7Note that the rcuclassic implementation of RCU does not provide debugfs
8trace output.
9
10The following sections describe the debugfs files and formats for
11preemptable RCU (rcupreempt) and hierarchical RCU (rcutree).
12
13
14Preemptable RCU debugfs Files and Formats
15
16This implementation of RCU provides three debugfs files under the
17top-level directory RCU: rcu/rcuctrs (which displays the per-CPU
18counters used by preemptable RCU) rcu/rcugp (which displays grace-period
19counters), and rcu/rcustats (which internal counters for debugging RCU).
20
21The output of "cat rcu/rcuctrs" looks as follows:
22
23CPU last cur F M
24 0 5 -5 0 0
25 1 -1 0 0 0
26 2 0 1 0 0
27 3 0 1 0 0
28 4 0 1 0 0
29 5 0 1 0 0
30 6 0 2 0 0
31 7 0 -1 0 0
32 8 0 1 0 0
33ggp = 26226, state = waitzero
34
35The per-CPU fields are as follows:
36
37o "CPU" gives the CPU number. Offline CPUs are not displayed.
38
39o "last" gives the value of the counter that is being decremented
40 for the current grace period phase. In the example above,
41 the counters sum to 4, indicating that there are still four
42 RCU read-side critical sections still running that started
43 before the last counter flip.
44
45o "cur" gives the value of the counter that is currently being
46 both incremented (by rcu_read_lock()) and decremented (by
47 rcu_read_unlock()). In the example above, the counters sum to
48 1, indicating that there is only one RCU read-side critical section
49 still running that started after the last counter flip.
50
51o "F" indicates whether RCU is waiting for this CPU to acknowledge
52 a counter flip. In the above example, RCU is not waiting on any,
53 which is consistent with the state being "waitzero" rather than
54 "waitack".
55
56o "M" indicates whether RCU is waiting for this CPU to execute a
57 memory barrier. In the above example, RCU is not waiting on any,
58 which is consistent with the state being "waitzero" rather than
59 "waitmb".
60
61o "ggp" is the global grace-period counter.
62
63o "state" is the RCU state, which can be one of the following:
64
65 o "idle": there is no grace period in progress.
66
67 o "waitack": RCU just incremented the global grace-period
68 counter, which has the effect of reversing the roles of
69 the "last" and "cur" counters above, and is waiting for
70 all the CPUs to acknowledge the flip. Once the flip has
71 been acknowledged, CPUs will no longer be incrementing
72 what are now the "last" counters, so that their sum will
73 decrease monotonically down to zero.
74
75 o "waitzero": RCU is waiting for the sum of the "last" counters
76 to decrease to zero.
77
78 o "waitmb": RCU is waiting for each CPU to execute a memory
79 barrier, which ensures that instructions from a given CPU's
80 last RCU read-side critical section cannot be reordered
81 with instructions following the memory-barrier instruction.
82
83The output of "cat rcu/rcugp" looks as follows:
84
85oldggp=48870 newggp=48873
86
87Note that reading from this file provokes a synchronize_rcu(). The
88"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before
89executing the synchronize_rcu(), and the "newggp" value is also the
90"ggp" value, but taken after the synchronize_rcu() command returns.
91
92
93The output of "cat rcu/rcugp" looks as follows:
94
95na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871
961=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640
97z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639
98
99These are counters tracking internal preemptable-RCU events, however,
100some of them may be useful for debugging algorithms using RCU. In
101particular, the "nl", "wl", and "dl" values track the number of RCU
102callbacks in various states. The fields are as follows:
103
104o "na" is the total number of RCU callbacks that have been enqueued
105 since boot.
106
107o "nl" is the number of RCU callbacks waiting for the previous
108 grace period to end so that they can start waiting on the next
109 grace period.
110
111o "wa" is the total number of RCU callbacks that have started waiting
112 for a grace period since boot. "na" should be roughly equal to
113 "nl" plus "wa".
114
115o "wl" is the number of RCU callbacks currently waiting for their
116 grace period to end.
117
118o "da" is the total number of RCU callbacks whose grace periods
119 have completed since boot. "wa" should be roughly equal to
120 "wl" plus "da".
121
122o "dr" is the total number of RCU callbacks that have been removed
123 from the list of callbacks ready to invoke. "dr" should be roughly
124 equal to "da".
125
126o "di" is the total number of RCU callbacks that have been invoked
127 since boot. "di" should be roughly equal to "da", though some
128 early versions of preemptable RCU had a bug so that only the
129 last CPU's count of invocations was displayed, rather than the
130 sum of all CPU's counts.
131
132o "1" is the number of calls to rcu_try_flip(). This should be
133 roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1"
134 described below. In other words, the number of times that
135 the state machine is visited should be equal to the sum of the
136 number of times that each state is visited plus the number of
137 times that the state-machine lock acquisition failed.
138
139o "e1" is the number of times that rcu_try_flip() was unable to
140 acquire the fliplock.
141
142o "i1" is the number of calls to rcu_try_flip_idle().
143
144o "ie1" is the number of times rcu_try_flip_idle() exited early
145 due to the calling CPU having no work for RCU.
146
147o "g1" is the number of times that rcu_try_flip_idle() decided
148 to start a new grace period. "i1" should be roughly equal to
149 "ie1" plus "g1".
150
151o "a1" is the number of calls to rcu_try_flip_waitack().
152
153o "ae1" is the number of times that rcu_try_flip_waitack() found
154 that at least one CPU had not yet acknowledge the new grace period
155 (AKA "counter flip").
156
157o "a2" is the number of time rcu_try_flip_waitack() found that
158 all CPUs had acknowledged. "a1" should be roughly equal to
159 "ae1" plus "a2". (This particular output was collected on
160 a 128-CPU machine, hence the smaller-than-usual fraction of
161 calls to rcu_try_flip_waitack() finding all CPUs having already
162 acknowledged.)
163
164o "z1" is the number of calls to rcu_try_flip_waitzero().
165
166o "ze1" is the number of times that rcu_try_flip_waitzero() found
167 that not all of the old RCU read-side critical sections had
168 completed.
169
170o "z2" is the number of times that rcu_try_flip_waitzero() finds
171 the sum of the counters equal to zero, in other words, that
172 all of the old RCU read-side critical sections had completed.
173 The value of "z1" should be roughly equal to "ze1" plus
174 "z2".
175
176o "m1" is the number of calls to rcu_try_flip_waitmb().
177
178o "me1" is the number of times that rcu_try_flip_waitmb() finds
179 that at least one CPU has not yet executed a memory barrier.
180
181o "m2" is the number of times that rcu_try_flip_waitmb() finds that
182 all CPUs have executed a memory barrier.
183
184
185Hierarchical RCU debugfs Files and Formats
186
187This implementation of RCU provides three debugfs files under the
188top-level directory RCU: rcu/rcudata (which displays fields in struct
189rcu_data), rcu/rcugp (which displays grace-period counters), and
190rcu/rcuhier (which displays the struct rcu_node hierarchy).
191
192The output of "cat rcu/rcudata" looks as follows:
193
194rcu:
195 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
196 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
197 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
198 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
199 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
200 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
201 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
202 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
203rcu_bh:
204 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
205 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
206 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
207 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
208 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
209 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
210 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
211 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
212
213The first section lists the rcu_data structures for rcu, the second for
214rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system.
215The fields are as follows:
216
217o The number at the beginning of each line is the CPU number.
218 CPUs numbers followed by an exclamation mark are offline,
219 but have been online at least once since boot. There will be
220 no output for CPUs that have never been online, which can be
221 a good thing in the surprisingly common case where NR_CPUS is
222 substantially larger than the number of actual CPUs.
223
224o "c" is the count of grace periods that this CPU believes have
225 completed. CPUs in dynticks idle mode may lag quite a ways
226 behind, for example, CPU 4 under "rcu" above, which has slept
227 through the past 25 RCU grace periods. It is not unusual to
228 see CPUs lagging by thousands of grace periods.
229
230o "g" is the count of grace periods that this CPU believes have
231 started. Again, CPUs in dynticks idle mode may lag behind.
232 If the "c" and "g" values are equal, this CPU has already
233 reported a quiescent state for the last RCU grace period that
234 it is aware of, otherwise, the CPU believes that it owes RCU a
235 quiescent state.
236
237o "pq" indicates that this CPU has passed through a quiescent state
238 for the current grace period. It is possible for "pq" to be
239 "1" and "c" different than "g", which indicates that although
240 the CPU has passed through a quiescent state, either (1) this
241 CPU has not yet reported that fact, (2) some other CPU has not
242 yet reported for this grace period, or (3) both.
243
244o "pqc" indicates which grace period the last-observed quiescent
245 state for this CPU corresponds to. This is important for handling
246 the race between CPU 0 reporting an extended dynticks-idle
247 quiescent state for CPU 1 and CPU 1 suddenly waking up and
248 reporting its own quiescent state. If CPU 1 was the last CPU
249 for the current grace period, then the CPU that loses this race
250 will attempt to incorrectly mark CPU 1 as having checked in for
251 the next grace period!
252
253o "qp" indicates that RCU still expects a quiescent state from
254 this CPU.
255
256o "rpfq" is the number of rcu_pending() calls on this CPU required
257 to induce this CPU to invoke force_quiescent_state().
258
259o "rp" is low-order four hex digits of the count of how many times
260 rcu_pending() has been invoked on this CPU.
261
262o "dt" is the current value of the dyntick counter that is incremented
263 when entering or leaving dynticks idle state, either by the
264 scheduler or by irq. The number after the "/" is the interrupt
265 nesting depth when in dyntick-idle state, or one greater than
266 the interrupt-nesting depth otherwise.
267
268 This field is displayed only for CONFIG_NO_HZ kernels.
269
270o "dn" is the current value of the dyntick counter that is incremented
271 when entering or leaving dynticks idle state via NMI. If both
272 the "dt" and "dn" values are even, then this CPU is in dynticks
273 idle mode and may be ignored by RCU. If either of these two
274 counters is odd, then RCU must be alert to the possibility of
275 an RCU read-side critical section running on this CPU.
276
277 This field is displayed only for CONFIG_NO_HZ kernels.
278
279o "df" is the number of times that some other CPU has forced a
280 quiescent state on behalf of this CPU due to this CPU being in
281 dynticks-idle state.
282
283 This field is displayed only for CONFIG_NO_HZ kernels.
284
285o "of" is the number of times that some other CPU has forced a
286 quiescent state on behalf of this CPU due to this CPU being
287 offline. In a perfect world, this might neve happen, but it
288 turns out that offlining and onlining a CPU can take several grace
289 periods, and so there is likely to be an extended period of time
290 when RCU believes that the CPU is online when it really is not.
291 Please note that erring in the other direction (RCU believing a
292 CPU is offline when it is really alive and kicking) is a fatal
293 error, so it makes sense to err conservatively.
294
295o "ri" is the number of times that RCU has seen fit to send a
296 reschedule IPI to this CPU in order to get it to report a
297 quiescent state.
298
299o "ql" is the number of RCU callbacks currently residing on
300 this CPU. This is the total number of callbacks, regardless
301 of what state they are in (new, waiting for grace period to
302 start, waiting for grace period to end, ready to invoke).
303
304o "b" is the batch limit for this CPU. If more than this number
305 of RCU callbacks is ready to invoke, then the remainder will
306 be deferred.
307
308
309The output of "cat rcu/rcugp" looks as follows:
310
311rcu: completed=33062 gpnum=33063
312rcu_bh: completed=464 gpnum=464
313
314Again, this output is for both "rcu" and "rcu_bh". The fields are
315taken from the rcu_state structure, and are as follows:
316
317o "completed" is the number of grace periods that have completed.
318 It is comparable to the "c" field from rcu/rcudata in that a
319 CPU whose "c" field matches the value of "completed" is aware
320 that the corresponding RCU grace period has completed.
321
322o "gpnum" is the number of grace periods that have started. It is
323 comparable to the "g" field from rcu/rcudata in that a CPU
324 whose "g" field matches the value of "gpnum" is aware that the
325 corresponding RCU grace period has started.
326
327 If these two fields are equal (as they are for "rcu_bh" above),
328 then there is no grace period in progress, in other words, RCU
329 is idle. On the other hand, if the two fields differ (as they
330 do for "rcu" above), then an RCU grace period is in progress.
331
332
333The output of "cat rcu/rcuhier" looks as follows, with very long lines:
334
335c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
3361/1 0:127 ^0
3373/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3
3383/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3
339rcu_bh:
340c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
3410/1 0:127 ^0
3420/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3
3430/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3
344
345This is once again split into "rcu" and "rcu_bh" portions. The fields are
346as follows:
347
348o "c" is exactly the same as "completed" under rcu/rcugp.
349
350o "g" is exactly the same as "gpnum" under rcu/rcugp.
351
352o "s" is the "signaled" state that drives force_quiescent_state()'s
353 state machine.
354
355o "jfq" is the number of jiffies remaining for this grace period
356 before force_quiescent_state() is invoked to help push things
357 along. Note that CPUs in dyntick-idle mode thoughout the grace
358 period will not report on their own, but rather must be check by
359 some other CPU via force_quiescent_state().
360
361o "j" is the low-order four hex digits of the jiffies counter.
362 Yes, Paul did run into a number of problems that turned out to
363 be due to the jiffies counter no longer counting. Why do you ask?
364
365o "nfqs" is the number of calls to force_quiescent_state() since
366 boot.
367
368o "nfqsng" is the number of useless calls to force_quiescent_state(),
369 where there wasn't actually a grace period active. This can
370 happen due to races. The number in parentheses is the difference
371 between "nfqs" and "nfqsng", or the number of times that
372 force_quiescent_state() actually did some real work.
373
374o "fqlh" is the number of calls to force_quiescent_state() that
375 exited immediately (without even being counted in nfqs above)
376 due to contention on ->fqslock.
377
378o Each element of the form "1/1 0:127 ^0" represents one struct
379 rcu_node. Each line represents one level of the hierarchy, from
380 root to leaves. It is best to think of the rcu_data structures
381 as forming yet another level after the leaves. Note that there
382 might be either one, two, or three levels of rcu_node structures,
383 depending on the relationship between CONFIG_RCU_FANOUT and
384 CONFIG_NR_CPUS.
385
386 o The numbers separated by the "/" are the qsmask followed
387 by the qsmaskinit. The qsmask will have one bit
388 set for each entity in the next lower level that
389 has not yet checked in for the current grace period.
390 The qsmaskinit will have one bit for each entity that is
391 currently expected to check in during each grace period.
392 The value of qsmaskinit is assigned to that of qsmask
393 at the beginning of each grace period.
394
395 For example, for "rcu", the qsmask of the first entry
396 of the lowest level is 0x14, meaning that we are still
397 waiting for CPUs 2 and 4 to check in for the current
398 grace period.
399
400 o The numbers separated by the ":" are the range of CPUs
401 served by this struct rcu_node. This can be helpful
402 in working out how the hierarchy is wired together.
403
404 For example, the first entry at the lowest level shows
405 "0:5", indicating that it covers CPUs 0 through 5.
406
407 o The number after the "^" indicates the bit in the
408 next higher level rcu_node structure that this
409 rcu_node structure corresponds to.
410
411 For example, the first entry at the lowest level shows
412 "^0", indicating that it corresponds to bit zero in
413 the first entry at the middle level.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e0f346d201ed..c9115c1b672c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -220,14 +220,17 @@ and is between 256 and 4096 characters. It is defined in the file
220 Bits in debug_level correspond to a level in 220 Bits in debug_level correspond to a level in
221 ACPI_DEBUG_PRINT statements, e.g., 221 ACPI_DEBUG_PRINT statements, e.g.,
222 ACPI_DEBUG_PRINT((ACPI_DB_INFO, ... 222 ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
223 See Documentation/acpi/debug.txt for more information 223 The debug_level mask defaults to "info". See
224 about debug layers and levels. 224 Documentation/acpi/debug.txt for more information about
225 debug layers and levels.
225 226
227 Enable processor driver info messages:
228 acpi.debug_layer=0x20000000
229 Enable PCI/PCI interrupt routing info messages:
230 acpi.debug_layer=0x400000
226 Enable AML "Debug" output, i.e., stores to the Debug 231 Enable AML "Debug" output, i.e., stores to the Debug
227 object while interpreting AML: 232 object while interpreting AML:
228 acpi.debug_layer=0xffffffff acpi.debug_level=0x2 233 acpi.debug_layer=0xffffffff acpi.debug_level=0x2
229 Enable PCI/PCI interrupt routing info messages:
230 acpi.debug_layer=0x400000 acpi.debug_level=0x4
231 Enable all messages related to ACPI hardware: 234 Enable all messages related to ACPI hardware:
232 acpi.debug_layer=0x2 acpi.debug_level=0xffffffff 235 acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
233 236
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 4ba4664ce5c3..9cb9138f7a79 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -71,35 +71,50 @@ Look at the current lock statistics:
71 71
72# less /proc/lock_stat 72# less /proc/lock_stat
73 73
7401 lock_stat version 0.2 7401 lock_stat version 0.3
7502 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 7502 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
7603 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 7603 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
7704 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 7704 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
7805 7805
7906 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 7906 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34
8007 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 8007 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88
8108 -------------------------- 8108 ---------------
8209 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190 8209 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928
8310 8310 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
8411 ............................................................................................................................................................................................... 8411 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce
8512 8512 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59
8613 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 8613 ---------------
8714 ----------- 8714 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce
8815 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230 8815 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928
8916 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210 8916 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59
9017 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70 9017 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
9118 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130 9118
9219 ...............................................................................................................................................................................................
9320
9421 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41
9522 -----------
9623 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
9724 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
9825 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
9926 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
10027 -----------
10128 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
10229 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
10330 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
10431 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
92 105
93This excerpt shows the first two lock class statistics. Line 01 shows the 106This excerpt shows the first two lock class statistics. Line 01 shows the
94output version - each time the format changes this will be updated. Line 02-04 107output version - each time the format changes this will be updated. Line 02-04
95show the header with column descriptions. Lines 05-10 and 13-18 show the actual 108show the header with column descriptions. Lines 05-18 and 20-31 show the actual
96statistics. These statistics come in two parts; the actual stats separated by a 109statistics. These statistics come in two parts; the actual stats separated by a
97short separator (line 08, 14) from the contention points. 110short separator (line 08, 13) from the contention points.
98 111
99The first lock (05-10) is a read/write lock, and shows two lines above the 112The first lock (05-18) is a read/write lock, and shows two lines above the
100short separator. The contention points don't match the column descriptors, 113short separator. The contention points don't match the column descriptors,
101they have two: contentions and [<IP>] symbol. 114they have two: contentions and [<IP>] symbol. The second set of contention
115points are the points we're contending with.
102 116
117The integer part of the time values is in us.
103 118
104View the top contending locks: 119View the top contending locks:
105 120
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 3cd2ad958176..394d7d378dc7 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -1063,6 +1063,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1063 1063
1064 STAC9227/9228/9229/927x 1064 STAC9227/9228/9229/927x
1065 ref Reference board 1065 ref Reference board
1066 ref-no-jd Reference board without HP/Mic jack detection
1066 3stack D965 3stack 1067 3stack D965 3stack
1067 5stack D965 5stack + SPDIF 1068 5stack D965 5stack + SPDIF
1068 dell-3stack Dell Dimension E520 1069 dell-3stack Dell Dimension E520
@@ -1076,6 +1077,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1076 1077
1077 STAC92HD73* 1078 STAC92HD73*
1078 ref Reference board 1079 ref Reference board
1080 no-jd BIOS setup but without jack-detection
1079 dell-m6-amic Dell desktops/laptops with analog mics 1081 dell-m6-amic Dell desktops/laptops with analog mics
1080 dell-m6-dmic Dell desktops/laptops with digital mics 1082 dell-m6-dmic Dell desktops/laptops with digital mics
1081 dell-m6 Dell desktops/laptops with both type of mics 1083 dell-m6 Dell desktops/laptops with both type of mics
diff --git a/MAINTAINERS b/MAINTAINERS
index c42a567e010c..fbc8fa58d56d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4529,7 +4529,7 @@ S: Maintained
4529USB VIDEO CLASS 4529USB VIDEO CLASS
4530P: Laurent Pinchart 4530P: Laurent Pinchart
4531M: laurent.pinchart@skynet.be 4531M: laurent.pinchart@skynet.be
4532L: linux-uvc-devel@lists.berlios.de 4532L: linux-uvc-devel@lists.berlios.de (subscribers-only)
4533L: video4linux-list@redhat.com 4533L: video4linux-list@redhat.com
4534W: http://linux-uvc.berlios.de 4534W: http://linux-uvc.berlios.de
4535S: Maintained 4535S: Maintained
diff --git a/Makefile b/Makefile
index 4c8d79710b84..71e98e9e6acd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 28 3SUBLEVEL = 28
4EXTRAVERSION = -rc9 4EXTRAVERSION =
5NAME = Erotic Pickled Herring 5NAME = Erotic Pickled Herring
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/mips/include/asm/byteorder.h b/arch/mips/include/asm/byteorder.h
index 2988d29a0867..33790b9e0cc0 100644
--- a/arch/mips/include/asm/byteorder.h
+++ b/arch/mips/include/asm/byteorder.h
@@ -50,9 +50,8 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
50static inline __attribute_const__ __u64 __arch_swab64(__u64 x) 50static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
51{ 51{
52 __asm__( 52 __asm__(
53 " dsbh %0, %1 \n" 53 " dsbh %0, %1\n"
54 " dshd %0, %0 \n" 54 " dshd %0, %0"
55 " drotr %0, %0, 32 \n"
56 : "=r" (x) 55 : "=r" (x)
57 : "r" (x)); 56 : "r" (x));
58 57
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a8eac1697b3d..d58f128aa747 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -232,7 +232,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
232 */ 232 */
233#ifdef __MIPSEB__ 233#ifdef __MIPSEB__
234#define ELF_DATA ELFDATA2MSB 234#define ELF_DATA ELFDATA2MSB
235#elif __MIPSEL__ 235#elif defined(__MIPSEL__)
236#define ELF_DATA ELFDATA2LSB 236#define ELF_DATA ELFDATA2LSB
237#endif 237#endif
238#define ELF_ARCH EM_MIPS 238#define ELF_ARCH EM_MIPS
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index b72ec66db699..1f6fd4fc05b9 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -44,9 +44,12 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
44{ 44{
45 BUG_ON(mm == &init_mm); /* Should never happen */ 45 BUG_ON(mm == &init_mm); /* Should never happen */
46 46
47#ifdef CONFIG_SMP 47#if 1 || defined(CONFIG_SMP)
48 flush_tlb_all(); 48 flush_tlb_all();
49#else 49#else
50 /* FIXME: currently broken, causing space id and protection ids
51 * to go out of sync, resulting in faults on userspace accesses.
52 */
50 if (mm) { 53 if (mm) {
51 if (mm->context != 0) 54 if (mm->context != 0)
52 free_sid(mm->context); 55 free_sid(mm->context);
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index f4e55be2eea9..afad9f5ac0ac 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -208,6 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
208 break; 208 break;
209 case ERR_TYPE_KERNEL_PANIC: 209 case ERR_TYPE_KERNEL_PANIC:
210 default: 210 default:
211 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
211 spin_unlock_irqrestore(&rtasd_log_lock, s); 212 spin_unlock_irqrestore(&rtasd_log_lock, s);
212 return; 213 return;
213 } 214 }
@@ -227,6 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
227 /* Check to see if we need to or have stopped logging */ 228 /* Check to see if we need to or have stopped logging */
228 if (fatal || !logging_enabled) { 229 if (fatal || !logging_enabled) {
229 logging_enabled = 0; 230 logging_enabled = 0;
231 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
230 spin_unlock_irqrestore(&rtasd_log_lock, s); 232 spin_unlock_irqrestore(&rtasd_log_lock, s);
231 return; 233 return;
232 } 234 }
@@ -249,11 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
249 else 251 else
250 rtas_log_start += 1; 252 rtas_log_start += 1;
251 253
254 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
252 spin_unlock_irqrestore(&rtasd_log_lock, s); 255 spin_unlock_irqrestore(&rtasd_log_lock, s);
253 wake_up_interruptible(&rtas_log_wait); 256 wake_up_interruptible(&rtas_log_wait);
254 break; 257 break;
255 case ERR_TYPE_KERNEL_PANIC: 258 case ERR_TYPE_KERNEL_PANIC:
256 default: 259 default:
260 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
257 spin_unlock_irqrestore(&rtasd_log_lock, s); 261 spin_unlock_irqrestore(&rtasd_log_lock, s);
258 return; 262 return;
259 } 263 }
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
index 753346e2cdfd..ae5f94d6317d 100644
--- a/arch/um/include/asm/system.h
+++ b/arch/um/include/asm/system.h
@@ -11,21 +11,21 @@ extern int get_signals(void);
11extern void block_signals(void); 11extern void block_signals(void);
12extern void unblock_signals(void); 12extern void unblock_signals(void);
13 13
14#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ 14#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
15 (flags) = get_signals(); } while(0) 15 (flags) = get_signals(); } while(0)
16#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ 16#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
17 set_signals(flags); } while(0) 17 set_signals(flags); } while(0)
18 18
19#define local_irq_save(flags) do { local_save_flags(flags); \ 19#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
20 local_irq_disable(); } while(0) 20 raw_local_irq_disable(); } while(0)
21 21
22#define local_irq_enable() unblock_signals() 22#define raw_local_irq_enable() unblock_signals()
23#define local_irq_disable() block_signals() 23#define raw_local_irq_disable() block_signals()
24 24
25#define irqs_disabled() \ 25#define irqs_disabled() \
26({ \ 26({ \
27 unsigned long flags; \ 27 unsigned long flags; \
28 local_save_flags(flags); \ 28 raw_local_save_flags(flags); \
29 (flags == 0); \ 29 (flags == 0); \
30}) 30})
31 31
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..8e99073b9e0f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -520,6 +520,7 @@ config X86_PTRACE_BTS
520 bool "Branch Trace Store" 520 bool "Branch Trace Store"
521 default y 521 default y
522 depends on X86_DEBUGCTLMSR 522 depends on X86_DEBUGCTLMSR
523 depends on BROKEN
523 help 524 help
524 This adds a ptrace interface to the hardware's branch trace store. 525 This adds a ptrace interface to the hardware's branch trace store.
525 526
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 097794ff6b79..3b43a65894c4 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -65,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
65 return dma_ops; 65 return dma_ops;
66 else 66 else
67 return dev->archdata.dma_ops; 67 return dev->archdata.dma_ops;
68#endif /* _ASM_X86_DMA_MAPPING_H */ 68#endif
69} 69}
70 70
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 0b500c5b6446..35276ec5925b 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -7,8 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9 9
10extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
11
12/* 10 seconds */ 10/* 10 seconds */
13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) 11#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
14 12
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf193..50ac542c9382 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -82,6 +82,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
82static inline void early_quirks(void) { } 82static inline void early_quirks(void) { }
83#endif 83#endif
84 84
85extern void pci_iommu_alloc(void);
86
85#endif /* __KERNEL__ */ 87#endif /* __KERNEL__ */
86 88
87#ifdef CONFIG_X86_32 89#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index d02d936840a3..4da207982777 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
23 int reg, int len, u32 value); 23 int reg, int len, u32 value);
24 24
25extern void dma32_reserve_bootmem(void); 25extern void dma32_reserve_bootmem(void);
26extern void pci_iommu_alloc(void);
27 26
28/* The PCI address space does equal the physical memory 27/* The PCI address space does equal the physical memory
29 * address space. The networking and block device layers use 28 * address space. The networking and block device layers use
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
157 int __ret_gu; \ 157 int __ret_gu; \
158 unsigned long __val_gu; \ 158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \ 159 __chk_user_ptr(ptr); \
160 might_fault(); \
160 switch (sizeof(*(ptr))) { \ 161 switch (sizeof(*(ptr))) { \
161 case 1: \ 162 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 163 __get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
241 int __ret_pu; \ 242 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \ 243 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \ 244 __chk_user_ptr(ptr); \
245 might_fault(); \
244 __pu_val = x; \ 246 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \ 247 switch (sizeof(*(ptr))) { \
246 case 1: \ 248 case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
82static __always_inline unsigned long __must_check 82static __always_inline unsigned long __must_check
83__copy_to_user(void __user *to, const void *from, unsigned long n) 83__copy_to_user(void __user *to, const void *from, unsigned long n)
84{ 84{
85 might_sleep(); 85 might_fault();
86 return __copy_to_user_inatomic(to, from, n); 86 return __copy_to_user_inatomic(to, from, n);
87} 87}
88 88
89static __always_inline unsigned long 89static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
137static __always_inline unsigned long 137static __always_inline unsigned long
138__copy_from_user(void *to, const void __user *from, unsigned long n) 138__copy_from_user(void *to, const void __user *from, unsigned long n)
139{ 139{
140 might_sleep(); 140 might_fault();
141 if (__builtin_constant_p(n)) { 141 if (__builtin_constant_p(n)) {
142 unsigned long ret; 142 unsigned long ret;
143 143
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
161{ 161{
162 might_sleep(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
164 unsigned long ret; 164 unsigned long ret;
165 165
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f8cfd00db450..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
29int __copy_from_user(void *dst, const void __user *src, unsigned size) 29int __copy_from_user(void *dst, const void __user *src, unsigned size)
30{ 30{
31 int ret = 0; 31 int ret = 0;
32
33 might_fault();
32 if (!__builtin_constant_p(size)) 34 if (!__builtin_constant_p(size))
33 return copy_user_generic(dst, (__force void *)src, size); 35 return copy_user_generic(dst, (__force void *)src, size);
34 switch (size) { 36 switch (size) {
@@ -71,6 +73,8 @@ static __always_inline __must_check
71int __copy_to_user(void __user *dst, const void *src, unsigned size) 73int __copy_to_user(void __user *dst, const void *src, unsigned size)
72{ 74{
73 int ret = 0; 75 int ret = 0;
76
77 might_fault();
74 if (!__builtin_constant_p(size)) 78 if (!__builtin_constant_p(size))
75 return copy_user_generic((__force void *)dst, src, size); 79 return copy_user_generic((__force void *)dst, src, size);
76 switch (size) { 80 switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
113int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 117int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
114{ 118{
115 int ret = 0; 119 int ret = 0;
120
121 might_fault();
116 if (!__builtin_constant_p(size)) 122 if (!__builtin_constant_p(size))
117 return copy_user_generic((__force void *)dst, 123 return copy_user_generic((__force void *)dst,
118 (__force void *)src, size); 124 (__force void *)src, size);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828e..a9c656f2d661 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -105,6 +105,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
105microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 105microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
106obj-$(CONFIG_MICROCODE) += microcode.o 106obj-$(CONFIG_MICROCODE) += microcode.o
107 107
108obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
109
108### 110###
109# 64 bit specific files 111# 64 bit specific files
110ifeq ($(CONFIG_X86_64),y) 112ifeq ($(CONFIG_X86_64),y)
@@ -118,7 +120,6 @@ ifeq ($(CONFIG_X86_64),y)
118 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 120 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
119 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 121 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
120 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 122 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
121 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
122 123
123 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 124 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
124endif 125endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a7b6dec6fc3f..0a60d60ed036 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -235,8 +235,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
235 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 235 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
236 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 236 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
237 237
238 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 238 if (unlikely(i == EXIT_LOOP_COUNT))
239 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 239 panic("AMD IOMMU: Completion wait loop failed\n");
240
240out: 241out:
241 spin_unlock_irqrestore(&iommu->lock, flags); 242 spin_unlock_irqrestore(&iommu->lock, flags);
242 243
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 30ae2701b3df..c6cc22815d35 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -427,6 +427,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
427 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 427 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
428 &entry, sizeof(entry)); 428 &entry, sizeof(entry));
429 429
430 /* set head and tail to zero manually */
431 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
432 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
433
430 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 434 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
431 435
432 return cmd_buf; 436 return cmd_buf;
@@ -1074,7 +1078,8 @@ int __init amd_iommu_init(void)
1074 goto free; 1078 goto free;
1075 1079
1076 /* IOMMU rlookup table - find the IOMMU for a specific device */ 1080 /* IOMMU rlookup table - find the IOMMU for a specific device */
1077 amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, 1081 amd_iommu_rlookup_table = (void *)__get_free_pages(
1082 GFP_KERNEL | __GFP_ZERO,
1078 get_order(rlookup_table_size)); 1083 get_order(rlookup_table_size));
1079 if (amd_iommu_rlookup_table == NULL) 1084 if (amd_iommu_rlookup_table == NULL)
1080 goto free; 1085 goto free;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 4b031a4ac856..1c838032fd37 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
510 */ 510 */
511void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 511void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
512{ 512{
513 static cpumask_t mce_cpus = CPU_MASK_NONE;
514
515 mce_cpu_quirks(c); 513 mce_cpu_quirks(c);
516 514
517 if (mce_dont_init || 515 if (mce_dont_init ||
518 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
519 !mce_available(c)) 516 !mce_available(c))
520 return; 517 return;
521 518
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 82fb2809ce32..c4b5b24e0217 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = {
272 .name = "microcode", 272 .name = "microcode",
273}; 273};
274 274
275static void microcode_fini_cpu(int cpu) 275static void __microcode_fini_cpu(int cpu)
276{ 276{
277 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 277 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
278 278
279 mutex_lock(&microcode_mutex);
280 microcode_ops->microcode_fini_cpu(cpu); 279 microcode_ops->microcode_fini_cpu(cpu);
281 uci->valid = 0; 280 uci->valid = 0;
281}
282
283static void microcode_fini_cpu(int cpu)
284{
285 mutex_lock(&microcode_mutex);
286 __microcode_fini_cpu(cpu);
282 mutex_unlock(&microcode_mutex); 287 mutex_unlock(&microcode_mutex);
283} 288}
284 289
@@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu)
306 * to this cpu (a bit of paranoia): 311 * to this cpu (a bit of paranoia):
307 */ 312 */
308 if (microcode_ops->collect_cpu_info(cpu, &nsig)) { 313 if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
309 microcode_fini_cpu(cpu); 314 __microcode_fini_cpu(cpu);
315 printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
316 cpu);
310 return -1; 317 return -1;
311 } 318 }
312 319
313 if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { 320 if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
314 microcode_fini_cpu(cpu); 321 __microcode_fini_cpu(cpu);
322 printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
323 cpu);
315 /* Should we look for a new ucode here? */ 324 /* Should we look for a new ucode here? */
316 return 1; 325 return 1;
317 } 326 }
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 622dc4a21784..a8e62792d171 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock);
155static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 155static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
156{ 156{
157 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 157 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
158 unsigned long flags;
158 unsigned int val[2]; 159 unsigned int val[2];
159 160
160 memset(csig, 0, sizeof(*csig)); 161 memset(csig, 0, sizeof(*csig));
@@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
174 csig->pf = 1 << ((val[1] >> 18) & 7); 175 csig->pf = 1 << ((val[1] >> 18) & 7);
175 } 176 }
176 177
178 /* serialize access to the physical write to MSR 0x79 */
179 spin_lock_irqsave(&microcode_update_lock, flags);
180
177 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 181 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
178 /* see notes above for revision 1.07. Apparent chip bug */ 182 /* see notes above for revision 1.07. Apparent chip bug */
179 sync_core(); 183 sync_core();
180 /* get the current revision from MSR 0x8B */ 184 /* get the current revision from MSR 0x8B */
181 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); 185 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
186 spin_unlock_irqrestore(&microcode_update_lock, flags);
187
182 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", 188 pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
183 csig->sig, csig->pf, csig->rev); 189 csig->sig, csig->pf, csig->rev);
184 190
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 192624820217..00e07447a5bd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -105,11 +105,15 @@ static void __init dma32_free_bootmem(void)
105 dma32_bootmem_ptr = NULL; 105 dma32_bootmem_ptr = NULL;
106 dma32_bootmem_size = 0; 106 dma32_bootmem_size = 0;
107} 107}
108#endif
108 109
109void __init pci_iommu_alloc(void) 110void __init pci_iommu_alloc(void)
110{ 111{
112#ifdef CONFIG_X86_64
111 /* free the range so iommu could get some range less than 4G */ 113 /* free the range so iommu could get some range less than 4G */
112 dma32_free_bootmem(); 114 dma32_free_bootmem();
115#endif
116
113 /* 117 /*
114 * The order of these functions is important for 118 * The order of these functions is important for
115 * fall-back/fail-over reasons 119 * fall-back/fail-over reasons
@@ -125,15 +129,6 @@ void __init pci_iommu_alloc(void)
125 pci_swiotlb_init(); 129 pci_swiotlb_init();
126} 130}
127 131
128unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
129{
130 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
131
132 return size >> PAGE_SHIFT;
133}
134EXPORT_SYMBOL(iommu_nr_pages);
135#endif
136
137void *dma_generic_alloc_coherent(struct device *dev, size_t size, 132void *dma_generic_alloc_coherent(struct device *dev, size_t size,
138 dma_addr_t *dma_addr, gfp_t flag) 133 dma_addr_t *dma_addr, gfp_t flag)
139{ 134{
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba7ad83e20a8..a35eaa379ff6 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -745,10 +745,8 @@ void __init gart_iommu_init(void)
745 unsigned long scratch; 745 unsigned long scratch;
746 long i; 746 long i;
747 747
748 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { 748 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
749 printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
750 return; 749 return;
751 }
752 750
753#ifndef CONFIG_AGP_AMD64 751#ifndef CONFIG_AGP_AMD64
754 no_agp = 1; 752 no_agp = 1;
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d111abb..242c3440687f 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
3#include <linux/pci.h> 3#include <linux/pci.h>
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/swiotlb.h>
7#include <linux/bootmem.h>
6#include <linux/dma-mapping.h> 8#include <linux/dma-mapping.h>
7 9
8#include <asm/iommu.h> 10#include <asm/iommu.h>
@@ -11,6 +13,31 @@
11 13
12int swiotlb __read_mostly; 14int swiotlb __read_mostly;
13 15
16void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
17{
18 return alloc_bootmem_low_pages(size);
19}
20
21void *swiotlb_alloc(unsigned order, unsigned long nslabs)
22{
23 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
24}
25
26dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
27{
28 return paddr;
29}
30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
32{
33 return baddr;
34}
35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
37{
38 return 0;
39}
40
14static dma_addr_t 41static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, 42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction) 43 int direction)
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = {
50void __init pci_swiotlb_init(void) 77void __init pci_swiotlb_init(void)
51{ 78{
52 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 79 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
80#ifdef CONFIG_X86_64
53 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) 81 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
54 swiotlb = 1; 82 swiotlb = 1;
83#endif
55 if (swiotlb_force) 84 if (swiotlb_force)
56 swiotlb = 1; 85 swiotlb = 1;
57 if (swiotlb) { 86 if (swiotlb) {
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
39#define __do_strncpy_from_user(dst, src, count, res) \ 39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \ 40do { \
41 int __d0, __d1, __d2; \ 41 int __d0, __d1, __d2; \
42 might_sleep(); \ 42 might_fault(); \
43 __asm__ __volatile__( \ 43 __asm__ __volatile__( \
44 " testl %1,%1\n" \ 44 " testl %1,%1\n" \
45 " jz 2f\n" \ 45 " jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
126#define __do_clear_user(addr,size) \ 126#define __do_clear_user(addr,size) \
127do { \ 127do { \
128 int __d0; \ 128 int __d0; \
129 might_sleep(); \ 129 might_fault(); \
130 __asm__ __volatile__( \ 130 __asm__ __volatile__( \
131 "0: rep; stosl\n" \ 131 "0: rep; stosl\n" \
132 " movl %2,%0\n" \ 132 " movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
155unsigned long 155unsigned long
156clear_user(void __user *to, unsigned long n) 156clear_user(void __user *to, unsigned long n)
157{ 157{
158 might_sleep(); 158 might_fault();
159 if (access_ok(VERIFY_WRITE, to, n)) 159 if (access_ok(VERIFY_WRITE, to, n))
160 __do_clear_user(to, n); 160 __do_clear_user(to, n);
161 return n; 161 return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
197 unsigned long mask = -__addr_ok(s); 197 unsigned long mask = -__addr_ok(s);
198 unsigned long res, tmp; 198 unsigned long res, tmp;
199 199
200 might_sleep(); 200 might_fault();
201 201
202 __asm__ __volatile__( 202 __asm__ __volatile__(
203 " testl %0, %0\n" 203 " testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
15#define __do_strncpy_from_user(dst,src,count,res) \ 15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \ 16do { \
17 long __d0, __d1, __d2; \ 17 long __d0, __d1, __d2; \
18 might_sleep(); \ 18 might_fault(); \
19 __asm__ __volatile__( \ 19 __asm__ __volatile__( \
20 " testq %1,%1\n" \ 20 " testq %1,%1\n" \
21 " jz 2f\n" \ 21 " jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
64unsigned long __clear_user(void __user *addr, unsigned long size) 64unsigned long __clear_user(void __user *addr, unsigned long size)
65{ 65{
66 long __d0; 66 long __d0;
67 might_sleep(); 67 might_fault();
68 /* no memory constraint because it doesn't change any memory gcc knows 68 /* no memory constraint because it doesn't change any memory gcc knows
69 about */ 69 about */
70 asm volatile( 70 asm volatile(
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..2b4b14fc0c04 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/pci.h>
24#include <linux/pfn.h> 25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
@@ -971,6 +972,8 @@ void __init mem_init(void)
971 972
972 start_periodic_check_for_corruption(); 973 start_periodic_check_for_corruption();
973 974
975 pci_iommu_alloc();
976
974#ifdef CONFIG_FLATMEM 977#ifdef CONFIG_FLATMEM
975 BUG_ON(!mem_map); 978 BUG_ON(!mem_map);
976#endif 979#endif
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 7edf6d913c13..765fd1c56cd6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -688,14 +688,6 @@ void __init acpi_early_init(void)
688 if (acpi_disabled) 688 if (acpi_disabled)
689 return; 689 return;
690 690
691 /*
692 * ACPI CA initializes acpi_dbg_level to non-zero, which means
693 * we get debug output merely by turning on CONFIG_ACPI_DEBUG.
694 * Turn it off so we don't get output unless the user specifies
695 * acpi.debug_level.
696 */
697 acpi_dbg_level = 0;
698
699 printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION); 691 printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION);
700 692
701 /* enable workarounds, unless strict ACPI spec. compliance */ 693 /* enable workarounds, unless strict ACPI spec. compliance */
diff --git a/drivers/acpi/utilities/utglobal.c b/drivers/acpi/utilities/utglobal.c
index 670551b95e56..17ed5ac840f7 100644
--- a/drivers/acpi/utilities/utglobal.c
+++ b/drivers/acpi/utilities/utglobal.c
@@ -64,7 +64,7 @@ u32 acpi_dbg_level = ACPI_DEBUG_DEFAULT;
64 64
65/* Debug switch - layer (component) mask */ 65/* Debug switch - layer (component) mask */
66 66
67u32 acpi_dbg_layer = ACPI_COMPONENT_DEFAULT | ACPI_ALL_DRIVERS; 67u32 acpi_dbg_layer = 0;
68u32 acpi_gbl_nesting_level = 0; 68u32 acpi_gbl_nesting_level = 0;
69 69
70/* Debugger globals */ 70/* Debugger globals */
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9364dc554257..9f7c543cc04b 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1693,6 +1693,11 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1693 for (i = 0; i <= h->highest_lun; i++) { 1693 for (i = 0; i <= h->highest_lun; i++) {
1694 int j; 1694 int j;
1695 drv_found = 0; 1695 drv_found = 0;
1696
1697 /* skip holes in the array from already deleted drives */
1698 if (h->drv[i].raid_level == -1)
1699 continue;
1700
1696 for (j = 0; j < num_luns; j++) { 1701 for (j = 0; j < num_luns; j++) {
1697 memcpy(&lunid, &ld_buff->LUN[j][0], 4); 1702 memcpy(&lunid, &ld_buff->LUN[j][0], 4);
1698 lunid = le32_to_cpu(lunid); 1703 lunid = le32_to_cpu(lunid);
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 5fcd3d89c75d..4041e9143283 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -394,6 +394,12 @@ static void edac_device_workq_function(struct work_struct *work_req)
394 394
395 mutex_lock(&device_ctls_mutex); 395 mutex_lock(&device_ctls_mutex);
396 396
397 /* If we are being removed, bail out immediately */
398 if (edac_dev->op_state == OP_OFFLINE) {
399 mutex_unlock(&device_ctls_mutex);
400 return;
401 }
402
397 /* Only poll controllers that are running polled and have a check */ 403 /* Only poll controllers that are running polled and have a check */
398 if ((edac_dev->op_state == OP_RUNNING_POLL) && 404 if ((edac_dev->op_state == OP_RUNNING_POLL) &&
399 (edac_dev->edac_check != NULL)) { 405 (edac_dev->edac_check != NULL)) {
@@ -585,14 +591,14 @@ struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
585 /* mark this instance as OFFLINE */ 591 /* mark this instance as OFFLINE */
586 edac_dev->op_state = OP_OFFLINE; 592 edac_dev->op_state = OP_OFFLINE;
587 593
588 /* clear workq processing on this instance */
589 edac_device_workq_teardown(edac_dev);
590
591 /* deregister from global list */ 594 /* deregister from global list */
592 del_edac_device_from_global_list(edac_dev); 595 del_edac_device_from_global_list(edac_dev);
593 596
594 mutex_unlock(&device_ctls_mutex); 597 mutex_unlock(&device_ctls_mutex);
595 598
599 /* clear workq processing on this instance */
600 edac_device_workq_teardown(edac_dev);
601
596 /* Tear down the sysfs entries for this instance */ 602 /* Tear down the sysfs entries for this instance */
597 edac_device_remove_sysfs(edac_dev); 603 edac_device_remove_sysfs(edac_dev);
598 604
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 553dd4bc3075..afa8a12cd009 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -717,7 +717,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
717 value = dev->pci_device; 717 value = dev->pci_device;
718 break; 718 break;
719 case I915_PARAM_HAS_GEM: 719 case I915_PARAM_HAS_GEM:
720 value = 1; 720 value = dev_priv->has_gem;
721 break; 721 break;
722 default: 722 default:
723 DRM_ERROR("Unknown parameter %d\n", param->param); 723 DRM_ERROR("Unknown parameter %d\n", param->param);
@@ -830,6 +830,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
830 830
831 dev_priv->regs = ioremap(base, size); 831 dev_priv->regs = ioremap(base, size);
832 832
833#ifdef CONFIG_HIGHMEM64G
834 /* don't enable GEM on PAE - needs agp + set_memory_* interface fixes */
835 dev_priv->has_gem = 0;
836#else
837 /* enable GEM by default */
838 dev_priv->has_gem = 1;
839#endif
840
833 i915_gem_load(dev); 841 i915_gem_load(dev);
834 842
835 /* Init HWS */ 843 /* Init HWS */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index adc972cc6bfc..b3cc4731aa7c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -106,6 +106,8 @@ struct intel_opregion {
106typedef struct drm_i915_private { 106typedef struct drm_i915_private {
107 struct drm_device *dev; 107 struct drm_device *dev;
108 108
109 int has_gem;
110
109 void __iomem *regs; 111 void __iomem *regs;
110 drm_local_map_t *sarea; 112 drm_local_map_t *sarea;
111 113
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad672d854828..24fe8c10b4b2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2309,7 +2309,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2309 } 2309 }
2310 2310
2311 obj_priv = obj->driver_private; 2311 obj_priv = obj->driver_private;
2312 args->busy = obj_priv->active; 2312 /* Don't count being on the flushing list against the object being
2313 * done. Otherwise, a buffer left on the flushing list but not getting
2314 * flushed (because nobody's flushing that domain) won't ever return
2315 * unbusy and get reused by libdrm's bo cache. The other expected
2316 * consumer of this interface, OpenGL's occlusion queries, also specs
2317 * that the objects get unbusy "eventually" without any interference.
2318 */
2319 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0;
2313 2320
2314 drm_gem_object_unreference(obj); 2321 drm_gem_object_unreference(obj);
2315 mutex_unlock(&dev->struct_mutex); 2322 mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 99be11418ac2..8289e16419a8 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -44,7 +44,7 @@ void radeon_irq_set_state(struct drm_device *dev, u32 mask, int state)
44 else 44 else
45 dev_priv->irq_enable_reg &= ~mask; 45 dev_priv->irq_enable_reg &= ~mask;
46 46
47 if (!dev->irq_enabled) 47 if (dev->irq_enabled)
48 RADEON_WRITE(RADEON_GEN_INT_CNTL, dev_priv->irq_enable_reg); 48 RADEON_WRITE(RADEON_GEN_INT_CNTL, dev_priv->irq_enable_reg);
49} 49}
50 50
@@ -57,7 +57,7 @@ static void r500_vbl_irq_set_state(struct drm_device *dev, u32 mask, int state)
57 else 57 else
58 dev_priv->r500_disp_irq_reg &= ~mask; 58 dev_priv->r500_disp_irq_reg &= ~mask;
59 59
60 if (!dev->irq_enabled) 60 if (dev->irq_enabled)
61 RADEON_WRITE(R500_DxMODE_INT_MASK, dev_priv->r500_disp_irq_reg); 61 RADEON_WRITE(R500_DxMODE_INT_MASK, dev_priv->r500_disp_irq_reg);
62} 62}
63 63
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 53f079cc00af..d8ede85fe17f 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -81,11 +81,12 @@ static u8 cs5530_udma_filter(ide_drive_t *drive)
81{ 81{
82 ide_hwif_t *hwif = drive->hwif; 82 ide_hwif_t *hwif = drive->hwif;
83 ide_drive_t *mate = ide_get_pair_dev(drive); 83 ide_drive_t *mate = ide_get_pair_dev(drive);
84 u16 *mateid = mate->id; 84 u16 *mateid;
85 u8 mask = hwif->ultra_mask; 85 u8 mask = hwif->ultra_mask;
86 86
87 if (mate == NULL) 87 if (mate == NULL)
88 goto out; 88 goto out;
89 mateid = mate->id;
89 90
90 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { 91 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
91 if ((mateid[ATA_ID_FIELD_VALID] & 4) && 92 if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index f1a8758e3a99..ec7f766ef5e4 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -104,11 +104,12 @@ static u8 sc1200_udma_filter(ide_drive_t *drive)
104{ 104{
105 ide_hwif_t *hwif = drive->hwif; 105 ide_hwif_t *hwif = drive->hwif;
106 ide_drive_t *mate = ide_get_pair_dev(drive); 106 ide_drive_t *mate = ide_get_pair_dev(drive);
107 u16 *mateid = mate->id; 107 u16 *mateid;
108 u8 mask = hwif->ultra_mask; 108 u8 mask = hwif->ultra_mask;
109 109
110 if (mate == NULL) 110 if (mate == NULL)
111 goto out; 111 goto out;
112 mateid = mate->id;
112 113
113 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { 114 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
114 if ((mateid[ATA_ID_FIELD_VALID] & 4) && 115 if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index ac89a5deaca2..ab7c8e4a61f9 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -208,16 +208,19 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
208 */ 208 */
209 209
210/* IO operations when bitmap is stored near all superblocks */ 210/* IO operations when bitmap is stored near all superblocks */
211static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) 211static struct page *read_sb_page(mddev_t *mddev, long offset,
212 struct page *page,
213 unsigned long index, int size)
212{ 214{
213 /* choose a good rdev and read the page from there */ 215 /* choose a good rdev and read the page from there */
214 216
215 mdk_rdev_t *rdev; 217 mdk_rdev_t *rdev;
216 struct list_head *tmp; 218 struct list_head *tmp;
217 struct page *page = alloc_page(GFP_KERNEL);
218 sector_t target; 219 sector_t target;
219 220
220 if (!page) 221 if (!page)
222 page = alloc_page(GFP_KERNEL);
223 if (!page)
221 return ERR_PTR(-ENOMEM); 224 return ERR_PTR(-ENOMEM);
222 225
223 rdev_for_each(rdev, tmp, mddev) { 226 rdev_for_each(rdev, tmp, mddev) {
@@ -227,7 +230,9 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
227 230
228 target = rdev->sb_start + offset + index * (PAGE_SIZE/512); 231 target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
229 232
230 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { 233 if (sync_page_io(rdev->bdev, target,
234 roundup(size, bdev_hardsect_size(rdev->bdev)),
235 page, READ)) {
231 page->index = index; 236 page->index = index;
232 attach_page_buffers(page, NULL); /* so that free_buffer will 237 attach_page_buffers(page, NULL); /* so that free_buffer will
233 * quietly no-op */ 238 * quietly no-op */
@@ -544,7 +549,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
544 549
545 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); 550 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
546 } else { 551 } else {
547 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); 552 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset,
553 NULL,
554 0, sizeof(bitmap_super_t));
548 } 555 }
549 if (IS_ERR(bitmap->sb_page)) { 556 if (IS_ERR(bitmap->sb_page)) {
550 err = PTR_ERR(bitmap->sb_page); 557 err = PTR_ERR(bitmap->sb_page);
@@ -957,11 +964,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
957 */ 964 */
958 page = bitmap->sb_page; 965 page = bitmap->sb_page;
959 offset = sizeof(bitmap_super_t); 966 offset = sizeof(bitmap_super_t);
967 read_sb_page(bitmap->mddev, bitmap->offset,
968 page,
969 index, count);
960 } else if (file) { 970 } else if (file) {
961 page = read_page(file, index, bitmap, count); 971 page = read_page(file, index, bitmap, count);
962 offset = 0; 972 offset = 0;
963 } else { 973 } else {
964 page = read_sb_page(bitmap->mddev, bitmap->offset, index); 974 page = read_sb_page(bitmap->mddev, bitmap->offset,
975 NULL,
976 index, count);
965 offset = 0; 977 offset = 0;
966 } 978 }
967 if (IS_ERR(page)) { /* read error */ 979 if (IS_ERR(page)) { /* read error */
diff --git a/drivers/media/dvb/b2c2/Kconfig b/drivers/media/dvb/b2c2/Kconfig
index 73dc2ee9b014..b34301d56cd2 100644
--- a/drivers/media/dvb/b2c2/Kconfig
+++ b/drivers/media/dvb/b2c2/Kconfig
@@ -9,11 +9,11 @@ config DVB_B2C2_FLEXCOP
9 select DVB_STV0297 if !DVB_FE_CUSTOMISE 9 select DVB_STV0297 if !DVB_FE_CUSTOMISE
10 select DVB_BCM3510 if !DVB_FE_CUSTOMISE 10 select DVB_BCM3510 if !DVB_FE_CUSTOMISE
11 select DVB_LGDT330X if !DVB_FE_CUSTOMISE 11 select DVB_LGDT330X if !DVB_FE_CUSTOMISE
12 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE
13 select DVB_S5H1420 if !DVB_FE_CUSTOMISE 12 select DVB_S5H1420 if !DVB_FE_CUSTOMISE
14 select DVB_TUNER_ITD1000 if !DVB_FE_CUSTOMISE 13 select DVB_TUNER_ITD1000 if !DVB_FE_CUSTOMISE
15 select DVB_ISL6421 if !DVB_FE_CUSTOMISE 14 select DVB_ISL6421 if !DVB_FE_CUSTOMISE
16 select DVB_CX24123 if !DVB_FE_CUSTOMISE 15 select DVB_CX24123 if !DVB_FE_CUSTOMISE
16 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
17 help 17 help
18 Support for the digital TV receiver chip made by B2C2 Inc. included in 18 Support for the digital TV receiver chip made by B2C2 Inc. included in
19 Technisats PCI cards and USB boxes. 19 Technisats PCI cards and USB boxes.
diff --git a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
index a127a4175c40..5cded3708541 100644
--- a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
+++ b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
@@ -628,12 +628,14 @@ int flexcop_frontend_init(struct flexcop_device *fc)
628 } 628 }
629 629
630 /* try the cable dvb (stv0297) */ 630 /* try the cable dvb (stv0297) */
631 fc->fc_i2c_adap[0].no_base_addr = 1;
631 fc->fe = dvb_attach(stv0297_attach, &alps_tdee4_stv0297_config, i2c); 632 fc->fe = dvb_attach(stv0297_attach, &alps_tdee4_stv0297_config, i2c);
632 if (fc->fe != NULL) { 633 if (fc->fe != NULL) {
633 fc->dev_type = FC_CABLE; 634 fc->dev_type = FC_CABLE;
634 fc->fe->ops.tuner_ops.set_params = alps_tdee4_stv0297_tuner_set_params; 635 fc->fe->ops.tuner_ops.set_params = alps_tdee4_stv0297_tuner_set_params;
635 goto fe_found; 636 goto fe_found;
636 } 637 }
638 fc->fc_i2c_adap[0].no_base_addr = 0;
637 639
638 /* try the sky v2.3 (vp310/Samsung tbdu18132(tsa5059)) */ 640 /* try the sky v2.3 (vp310/Samsung tbdu18132(tsa5059)) */
639 fc->fe = dvb_attach(mt312_attach, 641 fc->fe = dvb_attach(mt312_attach,
diff --git a/drivers/media/dvb/b2c2/flexcop-i2c.c b/drivers/media/dvb/b2c2/flexcop-i2c.c
index 43a112ec6d44..f13783f08f0f 100644
--- a/drivers/media/dvb/b2c2/flexcop-i2c.c
+++ b/drivers/media/dvb/b2c2/flexcop-i2c.c
@@ -47,9 +47,13 @@ static int flexcop_i2c_read4(struct flexcop_i2c_adapter *i2c,
47 int len = r100.tw_sm_c_100.total_bytes, /* remember total_bytes is buflen-1 */ 47 int len = r100.tw_sm_c_100.total_bytes, /* remember total_bytes is buflen-1 */
48 ret; 48 ret;
49 49
50 r100.tw_sm_c_100.no_base_addr_ack_error = i2c->no_base_addr;
51 ret = flexcop_i2c_operation(i2c->fc, &r100); 50 ret = flexcop_i2c_operation(i2c->fc, &r100);
52 if (ret != 0) { 51 if (ret != 0) {
52 deb_i2c("Retrying operation\n");
53 r100.tw_sm_c_100.no_base_addr_ack_error = i2c->no_base_addr;
54 ret = flexcop_i2c_operation(i2c->fc, &r100);
55 }
56 if (ret != 0) {
53 deb_i2c("read failed. %d\n", ret); 57 deb_i2c("read failed. %d\n", ret);
54 return ret; 58 return ret;
55 } 59 }
diff --git a/drivers/media/dvb/bt8xx/Kconfig b/drivers/media/dvb/bt8xx/Kconfig
index 7e9c090fc04e..27edb0ece587 100644
--- a/drivers/media/dvb/bt8xx/Kconfig
+++ b/drivers/media/dvb/bt8xx/Kconfig
@@ -8,7 +8,7 @@ config DVB_BT8XX
8 select DVB_OR51211 if !DVB_FE_CUSTOMISE 8 select DVB_OR51211 if !DVB_FE_CUSTOMISE
9 select DVB_LGDT330X if !DVB_FE_CUSTOMISE 9 select DVB_LGDT330X if !DVB_FE_CUSTOMISE
10 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 10 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
11 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE 11 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
12 help 12 help
13 Support for PCI cards based on the Bt8xx PCI bridge. Examples are 13 Support for PCI cards based on the Bt8xx PCI bridge. Examples are
14 the Nebula cards, the Pinnacle PCTV cards, the Twinhan DST cards, 14 the Nebula cards, the Pinnacle PCTV cards, the Twinhan DST cards,
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index 62b68c291d99..49f7b20c25d6 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -24,8 +24,8 @@ config DVB_USB_A800
24 tristate "AVerMedia AverTV DVB-T USB 2.0 (A800)" 24 tristate "AVerMedia AverTV DVB-T USB 2.0 (A800)"
25 depends on DVB_USB 25 depends on DVB_USB
26 select DVB_DIB3000MC 26 select DVB_DIB3000MC
27 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE
28 select DVB_PLL if !DVB_FE_CUSTOMISE 27 select DVB_PLL if !DVB_FE_CUSTOMISE
28 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
29 help 29 help
30 Say Y here to support the AVerMedia AverTV DVB-T USB 2.0 (A800) receiver. 30 Say Y here to support the AVerMedia AverTV DVB-T USB 2.0 (A800) receiver.
31 31
@@ -34,7 +34,7 @@ config DVB_USB_DIBUSB_MB
34 depends on DVB_USB 34 depends on DVB_USB
35 select DVB_PLL if !DVB_FE_CUSTOMISE 35 select DVB_PLL if !DVB_FE_CUSTOMISE
36 select DVB_DIB3000MB 36 select DVB_DIB3000MB
37 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE 37 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
38 help 38 help
39 Support for USB 1.1 and 2.0 DVB-T receivers based on reference designs made by 39 Support for USB 1.1 and 2.0 DVB-T receivers based on reference designs made by
40 DiBcom (<http://www.dibcom.fr>) equipped with a DiB3000M-B demodulator. 40 DiBcom (<http://www.dibcom.fr>) equipped with a DiB3000M-B demodulator.
@@ -55,7 +55,7 @@ config DVB_USB_DIBUSB_MC
55 tristate "DiBcom USB DVB-T devices (based on the DiB3000M-C/P) (see help for device list)" 55 tristate "DiBcom USB DVB-T devices (based on the DiB3000M-C/P) (see help for device list)"
56 depends on DVB_USB 56 depends on DVB_USB
57 select DVB_DIB3000MC 57 select DVB_DIB3000MC
58 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE 58 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
59 help 59 help
60 Support for USB2.0 DVB-T receivers based on reference designs made by 60 Support for USB2.0 DVB-T receivers based on reference designs made by
61 DiBcom (<http://www.dibcom.fr>) equipped with a DiB3000M-C/P demodulator. 61 DiBcom (<http://www.dibcom.fr>) equipped with a DiB3000M-C/P demodulator.
@@ -73,11 +73,11 @@ config DVB_USB_DIB0700
73 select DVB_DIB7000M 73 select DVB_DIB7000M
74 select DVB_DIB3000MC 74 select DVB_DIB3000MC
75 select DVB_S5H1411 if !DVB_FE_CUSTOMISE 75 select DVB_S5H1411 if !DVB_FE_CUSTOMISE
76 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE
77 select MEDIA_TUNER_MT2266 if !DVB_FE_CUSTOMISE
78 select MEDIA_TUNER_XC2028 if !DVB_FE_CUSTOMISE
79 select MEDIA_TUNER_XC5000 if !DVB_FE_CUSTOMIZE
80 select DVB_TUNER_DIB0070 76 select DVB_TUNER_DIB0070
77 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
78 select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMIZE
79 select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMIZE
80 select MEDIA_TUNER_XC5000 if !MEDIA_TUNER_CUSTOMIZE
81 help 81 help
82 Support for USB2.0/1.1 DVB receivers based on the DiB0700 USB bridge. The 82 Support for USB2.0/1.1 DVB receivers based on the DiB0700 USB bridge. The
83 USB bridge is also present in devices having the DiB7700 DVB-T-USB 83 USB bridge is also present in devices having the DiB7700 DVB-T-USB
@@ -95,7 +95,7 @@ config DVB_USB_UMT_010
95 depends on DVB_USB 95 depends on DVB_USB
96 select DVB_PLL if !DVB_FE_CUSTOMISE 96 select DVB_PLL if !DVB_FE_CUSTOMISE
97 select DVB_DIB3000MC 97 select DVB_DIB3000MC
98 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE 98 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
99 help 99 help
100 Say Y here to support the HanfTek UMT-010 USB2.0 stick-sized DVB-T receiver. 100 Say Y here to support the HanfTek UMT-010 USB2.0 stick-sized DVB-T receiver.
101 101
@@ -107,11 +107,11 @@ config DVB_USB_CXUSB
107 select DVB_LGDT330X if !DVB_FE_CUSTOMISE 107 select DVB_LGDT330X if !DVB_FE_CUSTOMISE
108 select DVB_MT352 if !DVB_FE_CUSTOMISE 108 select DVB_MT352 if !DVB_FE_CUSTOMISE
109 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 109 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
110 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE
111 select MEDIA_TUNER_XC2028 if !DVB_FE_CUSTOMISE
112 select MEDIA_TUNER_MXL5005S if !DVB_FE_CUSTOMISE
113 select DVB_DIB7000P if !DVB_FE_CUSTOMISE 110 select DVB_DIB7000P if !DVB_FE_CUSTOMISE
114 select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE 111 select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
112 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
113 select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMIZE
114 select MEDIA_TUNER_MXL5005S if !MEDIA_TUNER_CUSTOMIZE
115 help 115 help
116 Say Y here to support the Conexant USB2.0 hybrid reference design. 116 Say Y here to support the Conexant USB2.0 hybrid reference design.
117 Currently, only DVB and ATSC modes are supported, analog mode 117 Currently, only DVB and ATSC modes are supported, analog mode
@@ -124,9 +124,9 @@ config DVB_USB_M920X
124 tristate "Uli m920x DVB-T USB2.0 support" 124 tristate "Uli m920x DVB-T USB2.0 support"
125 depends on DVB_USB 125 depends on DVB_USB
126 select DVB_MT352 if !DVB_FE_CUSTOMISE 126 select DVB_MT352 if !DVB_FE_CUSTOMISE
127 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE
128 select MEDIA_TUNER_TDA827X if !DVB_FE_CUSTOMISE
129 select DVB_TDA1004X if !DVB_FE_CUSTOMISE 127 select DVB_TDA1004X if !DVB_FE_CUSTOMISE
128 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
129 select MEDIA_TUNER_TDA827X if !MEDIA_TUNER_CUSTOMIZE
130 help 130 help
131 Say Y here to support the MSI Mega Sky 580 USB2.0 DVB-T receiver. 131 Say Y here to support the MSI Mega Sky 580 USB2.0 DVB-T receiver.
132 Currently, only devices with a product id of 132 Currently, only devices with a product id of
@@ -137,7 +137,7 @@ config DVB_USB_GL861
137 tristate "Genesys Logic GL861 USB2.0 support" 137 tristate "Genesys Logic GL861 USB2.0 support"
138 depends on DVB_USB 138 depends on DVB_USB
139 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 139 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
140 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE 140 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
141 help 141 help
142 Say Y here to support the MSI Megasky 580 (55801) DVB-T USB2.0 142 Say Y here to support the MSI Megasky 580 (55801) DVB-T USB2.0
143 receiver with USB ID 0db0:5581. 143 receiver with USB ID 0db0:5581.
@@ -146,7 +146,7 @@ config DVB_USB_AU6610
146 tristate "Alcor Micro AU6610 USB2.0 support" 146 tristate "Alcor Micro AU6610 USB2.0 support"
147 depends on DVB_USB 147 depends on DVB_USB
148 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 148 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
149 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE 149 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
150 help 150 help
151 Say Y here to support the Sigmatek DVB-110 DVB-T USB2.0 receiver. 151 Say Y here to support the Sigmatek DVB-110 DVB-T USB2.0 receiver.
152 152
@@ -198,8 +198,8 @@ config DVB_USB_NOVA_T_USB2
198 tristate "Hauppauge WinTV-NOVA-T usb2 DVB-T USB2.0 support" 198 tristate "Hauppauge WinTV-NOVA-T usb2 DVB-T USB2.0 support"
199 depends on DVB_USB 199 depends on DVB_USB
200 select DVB_DIB3000MC 200 select DVB_DIB3000MC
201 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE
202 select DVB_PLL if !DVB_FE_CUSTOMISE 201 select DVB_PLL if !DVB_FE_CUSTOMISE
202 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
203 help 203 help
204 Say Y here to support the Hauppauge WinTV-NOVA-T usb2 DVB-T USB2.0 receiver. 204 Say Y here to support the Hauppauge WinTV-NOVA-T usb2 DVB-T USB2.0 receiver.
205 205
@@ -235,8 +235,8 @@ config DVB_USB_OPERA1
235config DVB_USB_AF9005 235config DVB_USB_AF9005
236 tristate "Afatech AF9005 DVB-T USB1.1 support" 236 tristate "Afatech AF9005 DVB-T USB1.1 support"
237 depends on DVB_USB && EXPERIMENTAL 237 depends on DVB_USB && EXPERIMENTAL
238 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE 238 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
239 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE 239 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
240 help 240 help
241 Say Y here to support the Afatech AF9005 based DVB-T USB1.1 receiver 241 Say Y here to support the Afatech AF9005 based DVB-T USB1.1 receiver
242 and the TerraTec Cinergy T USB XE (Rev.1) 242 and the TerraTec Cinergy T USB XE (Rev.1)
@@ -284,7 +284,7 @@ config DVB_USB_DTV5100
284 tristate "AME DTV-5100 USB2.0 DVB-T support" 284 tristate "AME DTV-5100 USB2.0 DVB-T support"
285 depends on DVB_USB 285 depends on DVB_USB
286 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 286 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
287 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE 287 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
288 help 288 help
289 Say Y here to support the AME DTV-5100 USB2.0 DVB-T receiver. 289 Say Y here to support the AME DTV-5100 USB2.0 DVB-T receiver.
290 290
@@ -293,9 +293,9 @@ config DVB_USB_AF9015
293 depends on DVB_USB && EXPERIMENTAL 293 depends on DVB_USB && EXPERIMENTAL
294 select DVB_AF9013 294 select DVB_AF9013
295 select DVB_PLL if !DVB_FE_CUSTOMISE 295 select DVB_PLL if !DVB_FE_CUSTOMISE
296 select MEDIA_TUNER_MT2060 if !DVB_FE_CUSTOMISE 296 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
297 select MEDIA_TUNER_QT1010 if !DVB_FE_CUSTOMISE 297 select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
298 select MEDIA_TUNER_TDA18271 if !DVB_FE_CUSTOMISE 298 select MEDIA_TUNER_TDA18271 if !MEDIA_TUNER_CUSTOMIZE
299 select MEDIA_TUNER_MXL5005S if !DVB_FE_CUSTOMISE 299 select MEDIA_TUNER_MXL5005S if !MEDIA_TUNER_CUSTOMIZE
300 help 300 help
301 Say Y here to support the Afatech AF9015 based DVB-T USB2.0 receiver 301 Say Y here to support the Afatech AF9015 based DVB-T USB2.0 receiver
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index f28d3ae59e04..391732788911 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -446,13 +446,13 @@ static int stk7700ph_tuner_attach(struct dvb_usb_adapter *adap)
446 == NULL ? -ENODEV : 0; 446 == NULL ? -ENODEV : 0;
447} 447}
448 448
449#define DEFAULT_RC_INTERVAL 150 449#define DEFAULT_RC_INTERVAL 50
450 450
451static u8 rc_request[] = { REQUEST_POLL_RC, 0 }; 451static u8 rc_request[] = { REQUEST_POLL_RC, 0 };
452 452
453/* Number of keypresses to ignore before start repeating */ 453/* Number of keypresses to ignore before start repeating */
454#define RC_REPEAT_DELAY 2 454#define RC_REPEAT_DELAY 6
455#define RC_REPEAT_DELAY_V1_20 5 455#define RC_REPEAT_DELAY_V1_20 10
456 456
457 457
458 458
diff --git a/drivers/media/dvb/ttpci/Kconfig b/drivers/media/dvb/ttpci/Kconfig
index 867027ceab3e..401a04effc06 100644
--- a/drivers/media/dvb/ttpci/Kconfig
+++ b/drivers/media/dvb/ttpci/Kconfig
@@ -106,7 +106,7 @@ config DVB_BUDGET_CI
106 select DVB_TDA1004X if !DVB_FE_CUSTOMISE 106 select DVB_TDA1004X if !DVB_FE_CUSTOMISE
107 select DVB_LNBP21 if !DVB_FE_CUSTOMISE 107 select DVB_LNBP21 if !DVB_FE_CUSTOMISE
108 select DVB_TDA10023 if !DVB_FE_CUSTOMISE 108 select DVB_TDA10023 if !DVB_FE_CUSTOMISE
109 select MEDIA_TUNER_TDA827X if !DVB_FE_CUSTOMISE 109 select MEDIA_TUNER_TDA827X if !MEDIA_TUNER_CUSTOMIZE
110 select VIDEO_IR 110 select VIDEO_IR
111 help 111 help
112 Support for simple SAA7146 based DVB cards 112 Support for simple SAA7146 based DVB cards
diff --git a/drivers/media/video/compat_ioctl32.c b/drivers/media/video/compat_ioctl32.c
index e6ca4012b5f0..0ea85a05e5c0 100644
--- a/drivers/media/video/compat_ioctl32.c
+++ b/drivers/media/video/compat_ioctl32.c
@@ -831,7 +831,7 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
831{ 831{
832 int ret = -ENOIOCTLCMD; 832 int ret = -ENOIOCTLCMD;
833 833
834 if (!file->f_op->ioctl) 834 if (!file->f_op->ioctl && !file->f_op->unlocked_ioctl)
835 return ret; 835 return ret;
836 836
837 switch (cmd) { 837 switch (cmd) {
diff --git a/drivers/media/video/cx18/Kconfig b/drivers/media/video/cx18/Kconfig
index ef48565de7f1..8940b5387dec 100644
--- a/drivers/media/video/cx18/Kconfig
+++ b/drivers/media/video/cx18/Kconfig
@@ -9,7 +9,7 @@ config VIDEO_CX18
9 select VIDEO_CX2341X 9 select VIDEO_CX2341X
10 select VIDEO_CS5345 10 select VIDEO_CS5345
11 select DVB_S5H1409 if !DVB_FE_CUSTOMISE 11 select DVB_S5H1409 if !DVB_FE_CUSTOMISE
12 select MEDIA_TUNER_MXL5005S if !DVB_FE_CUSTOMISE 12 select MEDIA_TUNER_MXL5005S if !MEDIA_TUNER_CUSTOMIZE
13 ---help--- 13 ---help---
14 This is a video4linux driver for Conexant cx23418 based 14 This is a video4linux driver for Conexant cx23418 based
15 PCI combo video recorder devices. 15 PCI combo video recorder devices.
diff --git a/drivers/media/video/cx23885/Kconfig b/drivers/media/video/cx23885/Kconfig
index 8c1b7fa47a41..00f1e2e8889e 100644
--- a/drivers/media/video/cx23885/Kconfig
+++ b/drivers/media/video/cx23885/Kconfig
@@ -11,16 +11,16 @@ config VIDEO_CX23885
11 select VIDEO_CX25840 11 select VIDEO_CX25840
12 select VIDEO_CX2341X 12 select VIDEO_CX2341X
13 select DVB_DIB7000P if !DVB_FE_CUSTOMISE 13 select DVB_DIB7000P if !DVB_FE_CUSTOMISE
14 select MEDIA_TUNER_MT2131 if !DVB_FE_CUSTOMISE
15 select DVB_S5H1409 if !DVB_FE_CUSTOMISE 14 select DVB_S5H1409 if !DVB_FE_CUSTOMISE
16 select DVB_S5H1411 if !DVB_FE_CUSTOMISE 15 select DVB_S5H1411 if !DVB_FE_CUSTOMISE
17 select DVB_LGDT330X if !DVB_FE_CUSTOMISE 16 select DVB_LGDT330X if !DVB_FE_CUSTOMISE
18 select DVB_ZL10353 if !DVB_FE_CUSTOMISE 17 select DVB_ZL10353 if !DVB_FE_CUSTOMISE
18 select DVB_TDA10048 if !DVB_FE_CUSTOMIZE
19 select MEDIA_TUNER_MT2131 if !MEDIA_TUNER_CUSTOMIZE
19 select MEDIA_TUNER_XC2028 if !DVB_FE_CUSTOMIZE 20 select MEDIA_TUNER_XC2028 if !DVB_FE_CUSTOMIZE
20 select MEDIA_TUNER_TDA8290 if !DVB_FE_CUSTOMIZE 21 select MEDIA_TUNER_TDA8290 if !DVB_FE_CUSTOMIZE
21 select MEDIA_TUNER_TDA18271 if !DVB_FE_CUSTOMIZE 22 select MEDIA_TUNER_TDA18271 if !DVB_FE_CUSTOMIZE
22 select MEDIA_TUNER_XC5000 if !DVB_FE_CUSTOMIZE 23 select MEDIA_TUNER_XC5000 if !DVB_FE_CUSTOMIZE
23 select DVB_TDA10048 if !DVB_FE_CUSTOMIZE
24 ---help--- 24 ---help---
25 This is a video4linux driver for Conexant 23885 based 25 This is a video4linux driver for Conexant 23885 based
26 TV cards. 26 TV cards.
diff --git a/drivers/media/video/cx88/Kconfig b/drivers/media/video/cx88/Kconfig
index 0b9e5fac6239..b0f837588e01 100644
--- a/drivers/media/video/cx88/Kconfig
+++ b/drivers/media/video/cx88/Kconfig
@@ -56,12 +56,12 @@ config VIDEO_CX88_DVB
56 select DVB_NXT200X if !DVB_FE_CUSTOMISE 56 select DVB_NXT200X if !DVB_FE_CUSTOMISE
57 select DVB_CX24123 if !DVB_FE_CUSTOMISE 57 select DVB_CX24123 if !DVB_FE_CUSTOMISE
58 select DVB_ISL6421 if !DVB_FE_CUSTOMISE 58 select DVB_ISL6421 if !DVB_FE_CUSTOMISE
59 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE
60 select DVB_S5H1411 if !DVB_FE_CUSTOMISE 59 select DVB_S5H1411 if !DVB_FE_CUSTOMISE
61 select DVB_CX24116 if !DVB_FE_CUSTOMISE 60 select DVB_CX24116 if !DVB_FE_CUSTOMISE
62 select DVB_STV0299 if !DVB_FE_CUSTOMISE 61 select DVB_STV0299 if !DVB_FE_CUSTOMISE
63 select DVB_STV0288 if !DVB_FE_CUSTOMISE 62 select DVB_STV0288 if !DVB_FE_CUSTOMISE
64 select DVB_STB6000 if !DVB_FE_CUSTOMISE 63 select DVB_STB6000 if !DVB_FE_CUSTOMISE
64 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
65 ---help--- 65 ---help---
66 This adds support for DVB/ATSC cards based on the 66 This adds support for DVB/ATSC cards based on the
67 Conexant 2388x chip. 67 Conexant 2388x chip.
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 610f535a257c..4ea1f1e04897 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -549,10 +549,11 @@ static int em28xx_config(struct em28xx *dev)
549static void em28xx_config_i2c(struct em28xx *dev) 549static void em28xx_config_i2c(struct em28xx *dev)
550{ 550{
551 struct v4l2_routing route; 551 struct v4l2_routing route;
552 int zero = 0;
552 553
553 route.input = INPUT(dev->ctl_input)->vmux; 554 route.input = INPUT(dev->ctl_input)->vmux;
554 route.output = 0; 555 route.output = 0;
555 em28xx_i2c_call_clients(dev, VIDIOC_INT_RESET, NULL); 556 em28xx_i2c_call_clients(dev, VIDIOC_INT_RESET, &zero);
556 em28xx_i2c_call_clients(dev, VIDIOC_INT_S_VIDEO_ROUTING, &route); 557 em28xx_i2c_call_clients(dev, VIDIOC_INT_S_VIDEO_ROUTING, &route);
557 em28xx_i2c_call_clients(dev, VIDIOC_STREAMON, NULL); 558 em28xx_i2c_call_clients(dev, VIDIOC_STREAMON, NULL);
558} 559}
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 748a87e82e44..02a6e9ef0337 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -1264,10 +1264,10 @@ static int vidioc_s_jpegcomp(struct file *file, void *priv,
1264 struct gspca_dev *gspca_dev = priv; 1264 struct gspca_dev *gspca_dev = priv;
1265 int ret; 1265 int ret;
1266 1266
1267 if (mutex_lock_interruptible(&gspca_dev->usb_lock))
1268 return -ERESTARTSYS;
1269 if (!gspca_dev->sd_desc->set_jcomp) 1267 if (!gspca_dev->sd_desc->set_jcomp)
1270 return -EINVAL; 1268 return -EINVAL;
1269 if (mutex_lock_interruptible(&gspca_dev->usb_lock))
1270 return -ERESTARTSYS;
1271 ret = gspca_dev->sd_desc->set_jcomp(gspca_dev, jpegcomp); 1271 ret = gspca_dev->sd_desc->set_jcomp(gspca_dev, jpegcomp);
1272 mutex_unlock(&gspca_dev->usb_lock); 1272 mutex_unlock(&gspca_dev->usb_lock);
1273 return ret; 1273 return ret;
diff --git a/drivers/media/video/pvrusb2/Kconfig b/drivers/media/video/pvrusb2/Kconfig
index 19eb274c9cd0..854c2a885358 100644
--- a/drivers/media/video/pvrusb2/Kconfig
+++ b/drivers/media/video/pvrusb2/Kconfig
@@ -42,7 +42,7 @@ config VIDEO_PVRUSB2_DVB
42 select DVB_S5H1411 if !DVB_FE_CUSTOMISE 42 select DVB_S5H1411 if !DVB_FE_CUSTOMISE
43 select DVB_TDA10048 if !DVB_FE_CUSTOMIZE 43 select DVB_TDA10048 if !DVB_FE_CUSTOMIZE
44 select MEDIA_TUNER_TDA18271 if !DVB_FE_CUSTOMIZE 44 select MEDIA_TUNER_TDA18271 if !DVB_FE_CUSTOMIZE
45 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE 45 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
46 select MEDIA_TUNER_TDA8290 if !DVB_FE_CUSTOMIZE 46 select MEDIA_TUNER_TDA8290 if !DVB_FE_CUSTOMIZE
47 ---help--- 47 ---help---
48 48
diff --git a/drivers/media/video/saa7134/Kconfig b/drivers/media/video/saa7134/Kconfig
index 7021bbf5897b..fc2164e28e76 100644
--- a/drivers/media/video/saa7134/Kconfig
+++ b/drivers/media/video/saa7134/Kconfig
@@ -34,9 +34,9 @@ config VIDEO_SAA7134_DVB
34 select DVB_NXT200X if !DVB_FE_CUSTOMISE 34 select DVB_NXT200X if !DVB_FE_CUSTOMISE
35 select DVB_TDA10086 if !DVB_FE_CUSTOMISE 35 select DVB_TDA10086 if !DVB_FE_CUSTOMISE
36 select DVB_TDA826X if !DVB_FE_CUSTOMISE 36 select DVB_TDA826X if !DVB_FE_CUSTOMISE
37 select MEDIA_TUNER_TDA827X if !DVB_FE_CUSTOMISE
38 select DVB_ISL6421 if !DVB_FE_CUSTOMISE 37 select DVB_ISL6421 if !DVB_FE_CUSTOMISE
39 select MEDIA_TUNER_SIMPLE if !DVB_FE_CUSTOMISE 38 select MEDIA_TUNER_TDA827X if !MEDIA_TUNER_CUSTOMIZE
39 select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMIZE
40 ---help--- 40 ---help---
41 This adds support for DVB cards based on the 41 This adds support for DVB cards based on the
42 Philips saa7134 chip. 42 Philips saa7134 chip.
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index d62fd4f6b52e..ee090413e598 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2008,6 +2008,9 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
2008 return FAILED; 2008 return FAILED;
2009 } 2009 }
2010 2010
2011 /* make sure we have no outstanding commands at this stage */
2012 mptscsih_flush_running_cmds(hd);
2013
2011 ioc = hd->ioc; 2014 ioc = hd->ioc;
2012 printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n", 2015 printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n",
2013 ioc->name, SCpnt); 2016 ioc->name, SCpnt);
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 7e857e938adb..714a23035de1 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -116,6 +116,7 @@ struct ppp {
116 unsigned long last_xmit; /* jiffies when last pkt sent 9c */ 116 unsigned long last_xmit; /* jiffies when last pkt sent 9c */
117 unsigned long last_recv; /* jiffies when last pkt rcvd a0 */ 117 unsigned long last_recv; /* jiffies when last pkt rcvd a0 */
118 struct net_device *dev; /* network interface device a4 */ 118 struct net_device *dev; /* network interface device a4 */
119 int closing; /* is device closing down? a8 */
119#ifdef CONFIG_PPP_MULTILINK 120#ifdef CONFIG_PPP_MULTILINK
120 int nxchan; /* next channel to send something on */ 121 int nxchan; /* next channel to send something on */
121 u32 nxseq; /* next sequence number to send */ 122 u32 nxseq; /* next sequence number to send */
@@ -995,7 +996,7 @@ ppp_xmit_process(struct ppp *ppp)
995 struct sk_buff *skb; 996 struct sk_buff *skb;
996 997
997 ppp_xmit_lock(ppp); 998 ppp_xmit_lock(ppp);
998 if (ppp->dev) { 999 if (!ppp->closing) {
999 ppp_push(ppp); 1000 ppp_push(ppp);
1000 while (!ppp->xmit_pending 1001 while (!ppp->xmit_pending
1001 && (skb = skb_dequeue(&ppp->file.xq))) 1002 && (skb = skb_dequeue(&ppp->file.xq)))
@@ -1463,8 +1464,7 @@ static inline void
1463ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) 1464ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
1464{ 1465{
1465 ppp_recv_lock(ppp); 1466 ppp_recv_lock(ppp);
1466 /* ppp->dev == 0 means interface is closing down */ 1467 if (!ppp->closing)
1467 if (ppp->dev)
1468 ppp_receive_frame(ppp, skb, pch); 1468 ppp_receive_frame(ppp, skb, pch);
1469 else 1469 else
1470 kfree_skb(skb); 1470 kfree_skb(skb);
@@ -2498,18 +2498,16 @@ init_ppp_file(struct ppp_file *pf, int kind)
2498 */ 2498 */
2499static void ppp_shutdown_interface(struct ppp *ppp) 2499static void ppp_shutdown_interface(struct ppp *ppp)
2500{ 2500{
2501 struct net_device *dev;
2502
2503 mutex_lock(&all_ppp_mutex); 2501 mutex_lock(&all_ppp_mutex);
2504 ppp_lock(ppp);
2505 dev = ppp->dev;
2506 ppp->dev = NULL;
2507 ppp_unlock(ppp);
2508 /* This will call dev_close() for us. */ 2502 /* This will call dev_close() for us. */
2509 if (dev) { 2503 ppp_lock(ppp);
2510 unregister_netdev(dev); 2504 if (!ppp->closing) {
2511 free_netdev(dev); 2505 ppp->closing = 1;
2512 } 2506 ppp_unlock(ppp);
2507 unregister_netdev(ppp->dev);
2508 } else
2509 ppp_unlock(ppp);
2510
2513 cardmap_set(&all_ppp_units, ppp->file.index, NULL); 2511 cardmap_set(&all_ppp_units, ppp->file.index, NULL);
2514 ppp->file.dead = 1; 2512 ppp->file.dead = 1;
2515 ppp->owner = NULL; 2513 ppp->owner = NULL;
@@ -2554,7 +2552,7 @@ static void ppp_destroy_interface(struct ppp *ppp)
2554 if (ppp->xmit_pending) 2552 if (ppp->xmit_pending)
2555 kfree_skb(ppp->xmit_pending); 2553 kfree_skb(ppp->xmit_pending);
2556 2554
2557 kfree(ppp); 2555 free_netdev(ppp->dev);
2558} 2556}
2559 2557
2560/* 2558/*
@@ -2616,7 +2614,7 @@ ppp_connect_channel(struct channel *pch, int unit)
2616 if (pch->file.hdrlen > ppp->file.hdrlen) 2614 if (pch->file.hdrlen > ppp->file.hdrlen)
2617 ppp->file.hdrlen = pch->file.hdrlen; 2615 ppp->file.hdrlen = pch->file.hdrlen;
2618 hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */ 2616 hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */
2619 if (ppp->dev && hdrlen > ppp->dev->hard_header_len) 2617 if (hdrlen > ppp->dev->hard_header_len)
2620 ppp->dev->hard_header_len = hdrlen; 2618 ppp->dev->hard_header_len = hdrlen;
2621 list_add_tail(&pch->clist, &ppp->channels); 2619 list_add_tail(&pch->clist, &ppp->channels);
2622 ++ppp->n_channels; 2620 ++ppp->n_channels;
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index f9e244da30ae..9bcb6cbd5aa9 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -113,7 +113,7 @@ struct acpiphp_slot {
113 113
114 u8 device; /* pci device# */ 114 u8 device; /* pci device# */
115 115
116 u32 sun; /* ACPI _SUN (slot unique number) */ 116 unsigned long long sun; /* ACPI _SUN (slot unique number) */
117 u32 flags; /* see below */ 117 u32 flags; /* see below */
118}; 118};
119 119
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 95b536a23d25..43c10bd261b4 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -337,7 +337,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
337 slot->hotplug_slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; 337 slot->hotplug_slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
338 338
339 acpiphp_slot->slot = slot; 339 acpiphp_slot->slot = slot;
340 snprintf(name, SLOT_NAME_SIZE, "%u", slot->acpi_slot->sun); 340 snprintf(name, SLOT_NAME_SIZE, "%llu", slot->acpi_slot->sun);
341 341
342 retval = pci_hp_register(slot->hotplug_slot, 342 retval = pci_hp_register(slot->hotplug_slot,
343 acpiphp_slot->bridge->pci_bus, 343 acpiphp_slot->bridge->pci_bus,
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 955aae4071f7..3affc6472e65 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -255,13 +255,13 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
255 255
256 bridge->nr_slots++; 256 bridge->nr_slots++;
257 257
258 dbg("found ACPI PCI Hotplug slot %d at PCI %04x:%02x:%02x\n", 258 dbg("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
259 slot->sun, pci_domain_nr(bridge->pci_bus), 259 slot->sun, pci_domain_nr(bridge->pci_bus),
260 bridge->pci_bus->number, slot->device); 260 bridge->pci_bus->number, slot->device);
261 retval = acpiphp_register_hotplug_slot(slot); 261 retval = acpiphp_register_hotplug_slot(slot);
262 if (retval) { 262 if (retval) {
263 if (retval == -EBUSY) 263 if (retval == -EBUSY)
264 warn("Slot %d already registered by another " 264 warn("Slot %llu already registered by another "
265 "hotplug driver\n", slot->sun); 265 "hotplug driver\n", slot->sun);
266 else 266 else
267 warn("acpiphp_register_hotplug_slot failed " 267 warn("acpiphp_register_hotplug_slot failed "
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index c892daae74d6..633e743442ac 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -1402,10 +1402,6 @@ static int __init ibmphp_init(void)
1402 goto error; 1402 goto error;
1403 } 1403 }
1404 1404
1405 /* lock ourselves into memory with a module
1406 * count of -1 so that no one can unload us. */
1407 module_put(THIS_MODULE);
1408
1409exit: 1405exit:
1410 return rc; 1406 return rc;
1411 1407
@@ -1423,4 +1419,3 @@ static void __exit ibmphp_exit(void)
1423} 1419}
1424 1420
1425module_init(ibmphp_init); 1421module_init(ibmphp_init);
1426module_exit(ibmphp_exit);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4b23bc39b11e..39cf248d24e3 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -432,18 +432,19 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
432 goto err_out_release_ctlr; 432 goto err_out_release_ctlr;
433 } 433 }
434 434
435 /* Check if slot is occupied */
435 t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); 436 t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
436 437 t_slot->hpc_ops->get_adapter_status(t_slot, &value);
437 t_slot->hpc_ops->get_adapter_status(t_slot, &value); /* Check if slot is occupied */ 438 if (value) {
438 if (value && pciehp_force) { 439 if (pciehp_force)
439 rc = pciehp_enable_slot(t_slot); 440 pciehp_enable_slot(t_slot);
440 if (rc) /* -ENODEV: shouldn't happen, but deal with it */ 441 } else {
441 value = 0; 442 /* Power off slot if not occupied */
442 } 443 if (POWER_CTRL(ctrl)) {
443 if ((POWER_CTRL(ctrl)) && !value) { 444 rc = t_slot->hpc_ops->power_off_slot(t_slot);
444 rc = t_slot->hpc_ops->power_off_slot(t_slot); /* Power off slot if not occupied*/ 445 if (rc)
445 if (rc) 446 goto err_out_free_ctrl_slot;
446 goto err_out_free_ctrl_slot; 447 }
447 } 448 }
448 449
449 return 0; 450 return 0;
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index dfc63d01f20a..aac7006949f1 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -252,7 +252,7 @@ static void report_resume(struct pci_dev *dev, void *data)
252 252
253 if (!dev->driver || 253 if (!dev->driver ||
254 !dev->driver->err_handler || 254 !dev->driver->err_handler ||
255 !dev->driver->err_handler->slot_reset) 255 !dev->driver->err_handler->resume)
256 return; 256 return;
257 257
258 err_handler = dev->driver->err_handler; 258 err_handler = dev->driver->err_handler;
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 2cd77ab8fc66..054e05294af8 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -328,6 +328,13 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
328 int sr; 328 int sr;
329 u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, }; 329 u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
330 330
331 /* The clock has an 8 bit wide bcd-coded register (they never learn)
332 * for the year. tm_year is an offset from 1900 and we are interested
333 * in the 2000-2099 range, so any value less than 100 is invalid.
334 */
335 if (tm->tm_year < 100)
336 return -EINVAL;
337
331 regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec); 338 regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
332 regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min); 339 regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
333 regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL; 340 regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 162cd927d94b..94acbeed4e7c 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -175,8 +175,8 @@ static struct aac_driver_ident aac_drivers[] = {
175 { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* PERC 3/Di (Boxster/PERC3DiB) */ 175 { aac_rx_init, "percraid", "DELL ", "PERCRAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* PERC 3/Di (Boxster/PERC3DiB) */
176 { aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* catapult */ 176 { aac_rx_init, "aacraid", "ADAPTEC ", "catapult ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* catapult */
177 { aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* tomcat */ 177 { aac_rx_init, "aacraid", "ADAPTEC ", "tomcat ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* tomcat */
178 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2120S (Crusader) */ 178 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2120S ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2120S (Crusader) */
179 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan) */ 179 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec 2200S (Vulcan) */
180 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan-2m) */ 180 { aac_rx_init, "aacraid", "ADAPTEC ", "Adaptec 2200S ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Adaptec 2200S (Vulcan-2m) */
181 { aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S220 (Legend Crusader) */ 181 { aac_rx_init, "aacraid", "Legend ", "Legend S220 ", 1, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S220 (Legend Crusader) */
182 { aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S230 (Legend Vulcan) */ 182 { aac_rx_init, "aacraid", "Legend ", "Legend S230 ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend S230 (Legend Vulcan) */
diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c b/drivers/scsi/ibmvscsi/ibmvstgt.c
index 2a5b29d12172..e2dd6a45924a 100644
--- a/drivers/scsi/ibmvscsi/ibmvstgt.c
+++ b/drivers/scsi/ibmvscsi/ibmvstgt.c
@@ -864,21 +864,23 @@ static int ibmvstgt_probe(struct vio_dev *dev, const struct vio_device_id *id)
864 864
865 INIT_WORK(&vport->crq_work, handle_crq); 865 INIT_WORK(&vport->crq_work, handle_crq);
866 866
867 err = crq_queue_create(&vport->crq_queue, target); 867 err = scsi_add_host(shost, target->dev);
868 if (err) 868 if (err)
869 goto free_srp_target; 869 goto free_srp_target;
870 870
871 err = scsi_add_host(shost, target->dev); 871 err = scsi_tgt_alloc_queue(shost);
872 if (err) 872 if (err)
873 goto destroy_queue; 873 goto remove_host;
874 874
875 err = scsi_tgt_alloc_queue(shost); 875 err = crq_queue_create(&vport->crq_queue, target);
876 if (err) 876 if (err)
877 goto destroy_queue; 877 goto free_queue;
878 878
879 return 0; 879 return 0;
880destroy_queue: 880free_queue:
881 crq_queue_destroy(target); 881 scsi_tgt_free_queue(shost);
882remove_host:
883 scsi_remove_host(shost);
882free_srp_target: 884free_srp_target:
883 srp_target_free(target); 885 srp_target_free(target);
884put_host: 886put_host:
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 801c7cf54d2e..3fdee7370ccc 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -489,12 +489,6 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
489 if (!__kfifo_get(session->cmdpool.queue, 489 if (!__kfifo_get(session->cmdpool.queue,
490 (void*)&task, sizeof(void*))) 490 (void*)&task, sizeof(void*)))
491 return NULL; 491 return NULL;
492
493 if ((hdr->opcode == (ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE)) &&
494 hdr->ttt == RESERVED_ITT) {
495 conn->ping_task = task;
496 conn->last_ping = jiffies;
497 }
498 } 492 }
499 /* 493 /*
500 * released in complete pdu for task we expect a response for, and 494 * released in complete pdu for task we expect a response for, and
@@ -703,6 +697,11 @@ static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
703 task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); 697 task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
704 if (!task) 698 if (!task)
705 iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); 699 iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
700 else if (!rhdr) {
701 /* only track our nops */
702 conn->ping_task = task;
703 conn->last_ping = jiffies;
704 }
706} 705}
707 706
708static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr, 707static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa45a1a66867..148d3af92aef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -648,8 +648,8 @@ static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
648 struct request *req = cmd->request; 648 struct request *req = cmd->request;
649 unsigned long flags; 649 unsigned long flags;
650 650
651 scsi_unprep_request(req);
652 spin_lock_irqsave(q->queue_lock, flags); 651 spin_lock_irqsave(q->queue_lock, flags);
652 scsi_unprep_request(req);
653 blk_requeue_request(q, req); 653 blk_requeue_request(q, req);
654 spin_unlock_irqrestore(q->queue_lock, flags); 654 spin_unlock_irqrestore(q->queue_lock, flags);
655 655
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 0d15b0eaf79a..5139c25ca962 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -356,7 +356,9 @@ int w1_reset_select_slave(struct w1_slave *sl)
356 w1_write_8(sl->master, W1_SKIP_ROM); 356 w1_write_8(sl->master, W1_SKIP_ROM);
357 else { 357 else {
358 u8 match[9] = {W1_MATCH_ROM, }; 358 u8 match[9] = {W1_MATCH_ROM, };
359 memcpy(&match[1], (u8 *)&sl->reg_num, 8); 359 u64 rn = le64_to_cpu(*((u64*)&sl->reg_num));
360
361 memcpy(&match[1], &rn, 8);
360 w1_write_block(sl->master, match, 9); 362 w1_write_block(sl->master, match, 9);
361 } 363 }
362 return 0; 364 return 0;
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 3031e3233dd6..2a983d49d19c 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -45,7 +45,7 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
45 struct v9fs_dentry *dent; 45 struct v9fs_dentry *dent;
46 46
47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", 47 P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
48 fid->fid, dentry->d_iname); 48 fid->fid, dentry->d_name.name);
49 49
50 dent = dentry->d_fsdata; 50 dent = dentry->d_fsdata;
51 if (!dent) { 51 if (!dent) {
@@ -79,7 +79,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
79 struct p9_fid *fid, *ret; 79 struct p9_fid *fid, *ret;
80 80
81 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", 81 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
82 dentry->d_iname, dentry, uid, any); 82 dentry->d_name.name, dentry, uid, any);
83 dent = (struct v9fs_dentry *) dentry->d_fsdata; 83 dent = (struct v9fs_dentry *) dentry->d_fsdata;
84 ret = NULL; 84 ret = NULL;
85 if (dent) { 85 if (dent) {
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 24eb01087b6d..332b5ff02fec 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -160,7 +160,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses)
160 v9ses->flags |= V9FS_ACCESS_ANY; 160 v9ses->flags |= V9FS_ACCESS_ANY;
161 else { 161 else {
162 v9ses->flags |= V9FS_ACCESS_SINGLE; 162 v9ses->flags |= V9FS_ACCESS_SINGLE;
163 v9ses->uid = simple_strtol(s, &e, 10); 163 v9ses->uid = simple_strtoul(s, &e, 10);
164 if (*e != '\0') 164 if (*e != '\0')
165 v9ses->uid = ~0; 165 v9ses->uid = ~0;
166 } 166 }
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f9534f18df0a..06dcc7c4f234 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -52,7 +52,8 @@
52 52
53static int v9fs_dentry_delete(struct dentry *dentry) 53static int v9fs_dentry_delete(struct dentry *dentry)
54{ 54{
55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 55 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
56 dentry);
56 57
57 return 1; 58 return 1;
58} 59}
@@ -69,7 +70,8 @@ static int v9fs_dentry_delete(struct dentry *dentry)
69static int v9fs_cached_dentry_delete(struct dentry *dentry) 70static int v9fs_cached_dentry_delete(struct dentry *dentry)
70{ 71{
71 struct inode *inode = dentry->d_inode; 72 struct inode *inode = dentry->d_inode;
72 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 73 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
74 dentry);
73 75
74 if(!inode) 76 if(!inode)
75 return 1; 77 return 1;
@@ -88,7 +90,8 @@ void v9fs_dentry_release(struct dentry *dentry)
88 struct v9fs_dentry *dent; 90 struct v9fs_dentry *dent;
89 struct p9_fid *temp, *current_fid; 91 struct p9_fid *temp, *current_fid;
90 92
91 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 93 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
94 dentry);
92 dent = dentry->d_fsdata; 95 dent = dentry->d_fsdata;
93 if (dent) { 96 if (dent) {
94 list_for_each_entry_safe(current_fid, temp, &dent->fidlist, 97 list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8314d3f43b71..2dfcf5487efe 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -963,7 +963,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
963 if (buflen > PATH_MAX) 963 if (buflen > PATH_MAX)
964 buflen = PATH_MAX; 964 buflen = PATH_MAX;
965 965
966 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 966 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
967 dentry);
967 968
968 retval = v9fs_readlink(dentry, link, buflen); 969 retval = v9fs_readlink(dentry, link, buflen);
969 970
@@ -1022,7 +1023,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1022{ 1023{
1023 char *s = nd_get_link(nd); 1024 char *s = nd_get_link(nd);
1024 1025
1025 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); 1026 P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name,
1027 IS_ERR(s) ? "<error>" : s);
1026 if (!IS_ERR(s)) 1028 if (!IS_ERR(s))
1027 __putname(s); 1029 __putname(s);
1028} 1030}
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 09d33c7740f0..db8852d8bcf7 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -172,7 +172,7 @@
172 172
173/* Defaults for debug_level, debug and normal */ 173/* Defaults for debug_level, debug and normal */
174 174
175#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) 175#define ACPI_DEBUG_DEFAULT (ACPI_LV_INFO)
176#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) 176#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT)
177#define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) 177#define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
178 178
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 029c8c06c151..0515e754449d 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -141,6 +141,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
141/* 141/*
142 * We need to show where it is safe to preempt execution of ACPICA 142 * We need to show where it is safe to preempt execution of ACPICA
143 */ 143 */
144#define ACPI_PREEMPTION_POINT() cond_resched() 144#define ACPI_PREEMPTION_POINT() \
145 do { \
146 if (!irqs_disabled()) \
147 cond_resched(); \
148 } while (0)
145 149
146#endif /* __ACLINUX_H__ */ 150#endif /* __ACLINUX_H__ */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..b8ba6941f587 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -33,15 +33,14 @@ struct bug_entry {
33 33
34#ifndef __WARN 34#ifndef __WARN
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void warn_on_slowpath(const char *file, const int line);
37extern void warn_slowpath(const char *file, const int line, 36extern void warn_slowpath(const char *file, const int line,
38 const char *fmt, ...) __attribute__((format(printf, 3, 4))); 37 const char *fmt, ...) __attribute__((format(printf, 3, 4)));
39#define WANT_WARN_ON_SLOWPATH 38#define WANT_WARN_ON_SLOWPATH
40#endif 39#endif
41#define __WARN() warn_on_slowpath(__FILE__, __LINE__) 40#define __WARN() warn_slowpath(__FILE__, __LINE__, NULL)
42#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) 41#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
43#else 42#else
44#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) 43#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
45#endif 44#endif
46 45
47#ifndef WARN_ON 46#ifndef WARN_ON
diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index 777dbf695d44..27b1bcffe408 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,7 +2,6 @@
2#define _LINUX_BH_H 2#define _LINUX_BH_H
3 3
4extern void local_bh_disable(void); 4extern void local_bh_disable(void);
5extern void __local_bh_enable(void);
6extern void _local_bh_enable(void); 5extern void _local_bh_enable(void);
7extern void local_bh_enable(void); 6extern void local_bh_enable(void);
8extern void local_bh_enable_ip(unsigned long ip); 7extern void local_bh_enable_ip(unsigned long ip);
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 4aaa4afb1cb9..096476f1fb35 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -17,7 +17,7 @@ extern int debug_locks_off(void);
17({ \ 17({ \
18 int __ret = 0; \ 18 int __ret = 0; \
19 \ 19 \
20 if (unlikely(c)) { \ 20 if (!oops_in_progress && unlikely(c)) { \
21 if (debug_locks_off() && !debug_locks_silent) \ 21 if (debug_locks_off() && !debug_locks_silent) \
22 WARN_ON(1); \ 22 WARN_ON(1); \
23 __ret = 1; \ 23 __ret = 1; \
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 586ab56a3ec3..3bf5bb5a34f9 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -25,7 +25,8 @@ union ktime;
25#define FUTEX_WAKE_BITSET 10 25#define FUTEX_WAKE_BITSET 10
26 26
27#define FUTEX_PRIVATE_FLAG 128 27#define FUTEX_PRIVATE_FLAG 128
28#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG 28#define FUTEX_CLOCK_REALTIME 256
29#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
29 30
30#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) 31#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
31#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) 32#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
@@ -164,6 +165,8 @@ union futex_key {
164 } both; 165 } both;
165}; 166};
166 167
168#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
169
167#ifdef CONFIG_FUTEX 170#ifdef CONFIG_FUTEX
168extern void exit_robust_list(struct task_struct *curr); 171extern void exit_robust_list(struct task_struct *curr);
169extern void exit_pi_state_list(struct task_struct *curr); 172extern void exit_pi_state_list(struct task_struct *curr);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..9b70b9231693 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk)
118} 118}
119#endif 119#endif
120 120
121#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) 121#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU)
122extern void rcu_irq_enter(void); 122extern void rcu_irq_enter(void);
123extern void rcu_irq_exit(void); 123extern void rcu_irq_exit(void);
124extern void rcu_nmi_enter(void);
125extern void rcu_nmi_exit(void);
124#else 126#else
125# define rcu_irq_enter() do { } while (0) 127# define rcu_irq_enter() do { } while (0)
126# define rcu_irq_exit() do { } while (0) 128# define rcu_irq_exit() do { } while (0)
127#endif /* CONFIG_PREEMPT_RCU */ 129# define rcu_nmi_enter() do { } while (0)
130# define rcu_nmi_exit() do { } while (0)
131#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */
128 132
129/* 133/*
130 * It is safe to do non-atomic ops on ->hardirq_context, 134 * It is safe to do non-atomic ops on ->hardirq_context,
@@ -134,7 +138,6 @@ extern void rcu_irq_exit(void);
134 */ 138 */
135#define __irq_enter() \ 139#define __irq_enter() \
136 do { \ 140 do { \
137 rcu_irq_enter(); \
138 account_system_vtime(current); \ 141 account_system_vtime(current); \
139 add_preempt_count(HARDIRQ_OFFSET); \ 142 add_preempt_count(HARDIRQ_OFFSET); \
140 trace_hardirq_enter(); \ 143 trace_hardirq_enter(); \
@@ -153,7 +156,6 @@ extern void irq_enter(void);
153 trace_hardirq_exit(); \ 156 trace_hardirq_exit(); \
154 account_system_vtime(current); \ 157 account_system_vtime(current); \
155 sub_preempt_count(HARDIRQ_OFFSET); \ 158 sub_preempt_count(HARDIRQ_OFFSET); \
156 rcu_irq_exit(); \
157 } while (0) 159 } while (0)
158 160
159/* 161/*
@@ -161,7 +163,7 @@ extern void irq_enter(void);
161 */ 163 */
162extern void irq_exit(void); 164extern void irq_exit(void);
163 165
164#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) 166#define nmi_enter() do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0)
165#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) 167#define nmi_exit() do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0)
166 168
167#endif /* LINUX_HARDIRQ_H */ 169#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index dc7e0d0a6474..269df5a17b30 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -141,6 +141,15 @@ extern int _cond_resched(void);
141 (__x < 0) ? -__x : __x; \ 141 (__x < 0) ? -__x : __x; \
142 }) 142 })
143 143
144#ifdef CONFIG_PROVE_LOCKING
145void might_fault(void);
146#else
147static inline void might_fault(void)
148{
149 might_sleep();
150}
151#endif
152
144extern struct atomic_notifier_head panic_notifier_list; 153extern struct atomic_notifier_head panic_notifier_list;
145extern long (*panic_blink)(long time); 154extern long (*panic_blink)(long time);
146NORET_TYPE void panic(const char * fmt, ...) 155NORET_TYPE void panic(const char * fmt, ...)
@@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr);
188extern int core_kernel_text(unsigned long addr); 197extern int core_kernel_text(unsigned long addr);
189extern int __kernel_text_address(unsigned long addr); 198extern int __kernel_text_address(unsigned long addr);
190extern int kernel_text_address(unsigned long addr); 199extern int kernel_text_address(unsigned long addr);
200extern int func_ptr_is_kernel_text(void *ptr);
201
191struct pid; 202struct pid;
192extern struct pid *session_of_pgrp(struct pid *pgrp); 203extern struct pid *session_of_pgrp(struct pid *pgrp);
193 204
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 29aec6e10020..37a0361f4685 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -73,6 +73,8 @@ struct lock_class_key {
73 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; 73 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
74}; 74};
75 75
76#define LOCKSTAT_POINTS 4
77
76/* 78/*
77 * The lock-class itself: 79 * The lock-class itself:
78 */ 80 */
@@ -119,7 +121,8 @@ struct lock_class {
119 int name_version; 121 int name_version;
120 122
121#ifdef CONFIG_LOCK_STAT 123#ifdef CONFIG_LOCK_STAT
122 unsigned long contention_point[4]; 124 unsigned long contention_point[LOCKSTAT_POINTS];
125 unsigned long contending_point[LOCKSTAT_POINTS];
123#endif 126#endif
124}; 127};
125 128
@@ -144,6 +147,7 @@ enum bounce_type {
144 147
145struct lock_class_stats { 148struct lock_class_stats {
146 unsigned long contention_point[4]; 149 unsigned long contention_point[4];
150 unsigned long contending_point[4];
147 struct lock_time read_waittime; 151 struct lock_time read_waittime;
148 struct lock_time write_waittime; 152 struct lock_time write_waittime;
149 struct lock_time read_holdtime; 153 struct lock_time read_holdtime;
@@ -165,6 +169,7 @@ struct lockdep_map {
165 const char *name; 169 const char *name;
166#ifdef CONFIG_LOCK_STAT 170#ifdef CONFIG_LOCK_STAT
167 int cpu; 171 int cpu;
172 unsigned long ip;
168#endif 173#endif
169}; 174};
170 175
@@ -309,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
309extern void lock_release(struct lockdep_map *lock, int nested, 314extern void lock_release(struct lockdep_map *lock, int nested,
310 unsigned long ip); 315 unsigned long ip);
311 316
312extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, 317extern void lock_set_class(struct lockdep_map *lock, const char *name,
313 unsigned long ip); 318 struct lock_class_key *key, unsigned int subclass,
319 unsigned long ip);
320
321static inline void lock_set_subclass(struct lockdep_map *lock,
322 unsigned int subclass, unsigned long ip)
323{
324 lock_set_class(lock, lock->name, lock->key, subclass, ip);
325}
314 326
315# define INIT_LOCKDEP .lockdep_recursion = 0, 327# define INIT_LOCKDEP .lockdep_recursion = 0,
316 328
@@ -328,6 +340,7 @@ static inline void lockdep_on(void)
328 340
329# define lock_acquire(l, s, t, r, c, n, i) do { } while (0) 341# define lock_acquire(l, s, t, r, c, n, i) do { } while (0)
330# define lock_release(l, n, i) do { } while (0) 342# define lock_release(l, n, i) do { } while (0)
343# define lock_set_class(l, n, k, s, i) do { } while (0)
331# define lock_set_subclass(l, s, i) do { } while (0) 344# define lock_set_subclass(l, s, i) do { } while (0)
332# define lockdep_init() do { } while (0) 345# define lockdep_init() do { } while (0)
333# define lockdep_info() do { } while (0) 346# define lockdep_info() do { } while (0)
@@ -356,7 +369,7 @@ struct lock_class_key { };
356#ifdef CONFIG_LOCK_STAT 369#ifdef CONFIG_LOCK_STAT
357 370
358extern void lock_contended(struct lockdep_map *lock, unsigned long ip); 371extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
359extern void lock_acquired(struct lockdep_map *lock); 372extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
360 373
361#define LOCK_CONTENDED(_lock, try, lock) \ 374#define LOCK_CONTENDED(_lock, try, lock) \
362do { \ 375do { \
@@ -364,13 +377,13 @@ do { \
364 lock_contended(&(_lock)->dep_map, _RET_IP_); \ 377 lock_contended(&(_lock)->dep_map, _RET_IP_); \
365 lock(_lock); \ 378 lock(_lock); \
366 } \ 379 } \
367 lock_acquired(&(_lock)->dep_map); \ 380 lock_acquired(&(_lock)->dep_map, _RET_IP_); \
368} while (0) 381} while (0)
369 382
370#else /* CONFIG_LOCK_STAT */ 383#else /* CONFIG_LOCK_STAT */
371 384
372#define lock_contended(lockdep_map, ip) do {} while (0) 385#define lock_contended(lockdep_map, ip) do {} while (0)
373#define lock_acquired(lockdep_map) do {} while (0) 386#define lock_acquired(lockdep_map, ip) do {} while (0)
374 387
375#define LOCK_CONTENDED(_lock, try, lock) \ 388#define LOCK_CONTENDED(_lock, try, lock) \
376 lock(_lock) 389 lock(_lock)
@@ -481,4 +494,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
481# define lock_map_release(l) do { } while (0) 494# define lock_map_release(l) do { } while (0)
482#endif 495#endif
483 496
497#ifdef CONFIG_PROVE_LOCKING
498# define might_lock(lock) \
499do { \
500 typecheck(struct lockdep_map *, &(lock)->dep_map); \
501 lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \
502 lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
503} while (0)
504# define might_lock_read(lock) \
505do { \
506 typecheck(struct lockdep_map *, &(lock)->dep_map); \
507 lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \
508 lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
509} while (0)
510#else
511# define might_lock(lock) do { } while (0)
512# define might_lock_read(lock) do { } while (0)
513#endif
514
484#endif /* __LINUX_LOCKDEP_H */ 515#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bc6da10ceee0..7a0e5c4f8072 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
144/* 144/*
145 * NOTE: mutex_trylock() follows the spin_trylock() convention, 145 * NOTE: mutex_trylock() follows the spin_trylock() convention,
146 * not the down_trylock() convention! 146 * not the down_trylock() convention!
147 *
148 * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
147 */ 149 */
148extern int mutex_trylock(struct mutex *lock); 150extern int mutex_trylock(struct mutex *lock);
149extern void mutex_unlock(struct mutex *lock); 151extern void mutex_unlock(struct mutex *lock);
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62e6983..301dda829e37 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -41,7 +41,7 @@
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42 42
43#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 43#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
44#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ 44#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */
45#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ 45#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
46#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 46#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
47 47
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..bfd289aff576 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,11 +52,15 @@ struct rcu_head {
52 void (*func)(struct rcu_head *head); 52 void (*func)(struct rcu_head *head);
53}; 53};
54 54
55#ifdef CONFIG_CLASSIC_RCU 55#if defined(CONFIG_CLASSIC_RCU)
56#include <linux/rcuclassic.h> 56#include <linux/rcuclassic.h>
57#else /* #ifdef CONFIG_CLASSIC_RCU */ 57#elif defined(CONFIG_TREE_RCU)
58#include <linux/rcutree.h>
59#elif defined(CONFIG_PREEMPT_RCU)
58#include <linux/rcupreempt.h> 60#include <linux/rcupreempt.h>
59#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ 61#else
62#error "Unknown RCU implementation specified to kernel configuration"
63#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */
60 64
61#define RCU_HEAD_INIT { .next = NULL, .func = NULL } 65#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
62#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT 66#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
new file mode 100644
index 000000000000..d4368b7975c3
--- /dev/null
+++ b/include/linux/rcutree.h
@@ -0,0 +1,329 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2008
19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 *
26 * For detailed explanation of Read-Copy Update mechanism see -
27 * Documentation/RCU
28 */
29
30#ifndef __LINUX_RCUTREE_H
31#define __LINUX_RCUTREE_H
32
33#include <linux/cache.h>
34#include <linux/spinlock.h>
35#include <linux/threads.h>
36#include <linux/percpu.h>
37#include <linux/cpumask.h>
38#include <linux/seqlock.h>
39
40/*
41 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
42 * In theory, it should be possible to add more levels straightforwardly.
43 * In practice, this has not been tested, so there is probably some
44 * bug somewhere.
45 */
46#define MAX_RCU_LVLS 3
47#define RCU_FANOUT (CONFIG_RCU_FANOUT)
48#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
49#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
50
51#if NR_CPUS <= RCU_FANOUT
52# define NUM_RCU_LVLS 1
53# define NUM_RCU_LVL_0 1
54# define NUM_RCU_LVL_1 (NR_CPUS)
55# define NUM_RCU_LVL_2 0
56# define NUM_RCU_LVL_3 0
57#elif NR_CPUS <= RCU_FANOUT_SQ
58# define NUM_RCU_LVLS 2
59# define NUM_RCU_LVL_0 1
60# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
61# define NUM_RCU_LVL_2 (NR_CPUS)
62# define NUM_RCU_LVL_3 0
63#elif NR_CPUS <= RCU_FANOUT_CUBE
64# define NUM_RCU_LVLS 3
65# define NUM_RCU_LVL_0 1
66# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
67# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
68# define NUM_RCU_LVL_3 NR_CPUS
69#else
70# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
71#endif /* #if (NR_CPUS) <= RCU_FANOUT */
72
73#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
74#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
75
76/*
77 * Dynticks per-CPU state.
78 */
79struct rcu_dynticks {
80 int dynticks_nesting; /* Track nesting level, sort of. */
81 int dynticks; /* Even value for dynticks-idle, else odd. */
82 int dynticks_nmi; /* Even value for either dynticks-idle or */
83 /* not in nmi handler, else odd. So this */
84 /* remains even for nmi from irq handler. */
85};
86
87/*
88 * Definition for node within the RCU grace-period-detection hierarchy.
89 */
90struct rcu_node {
91 spinlock_t lock;
92 unsigned long qsmask; /* CPUs or groups that need to switch in */
93 /* order for current grace period to proceed.*/
94 unsigned long qsmaskinit;
95 /* Per-GP initialization for qsmask. */
96 unsigned long grpmask; /* Mask to apply to parent qsmask. */
97 int grplo; /* lowest-numbered CPU or group here. */
98 int grphi; /* highest-numbered CPU or group here. */
99 u8 grpnum; /* CPU/group number for next level up. */
100 u8 level; /* root is at level 0. */
101 struct rcu_node *parent;
102} ____cacheline_internodealigned_in_smp;
103
104/* Index values for nxttail array in struct rcu_data. */
105#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
106#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
107#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
108#define RCU_NEXT_TAIL 3
109#define RCU_NEXT_SIZE 4
110
111/* Per-CPU data for read-copy update. */
112struct rcu_data {
113 /* 1) quiescent-state and grace-period handling : */
114 long completed; /* Track rsp->completed gp number */
115 /* in order to detect GP end. */
116 long gpnum; /* Highest gp number that this CPU */
117 /* is aware of having started. */
118 long passed_quiesc_completed;
119 /* Value of completed at time of qs. */
120 bool passed_quiesc; /* User-mode/idle loop etc. */
121 bool qs_pending; /* Core waits for quiesc state. */
122 bool beenonline; /* CPU online at least once. */
123 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
124 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
125
126 /* 2) batch handling */
127 /*
128 * If nxtlist is not NULL, it is partitioned as follows.
129 * Any of the partitions might be empty, in which case the
130 * pointer to that partition will be equal to the pointer for
131 * the following partition. When the list is empty, all of
132 * the nxttail elements point to nxtlist, which is NULL.
133 *
134 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
135 * Entries that might have arrived after current GP ended
136 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
137 * Entries known to have arrived before current GP ended
138 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
139 * Entries that batch # <= ->completed - 1: waiting for current GP
140 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
141 * Entries that batch # <= ->completed
142 * The grace period for these entries has completed, and
143 * the other grace-period-completed entries may be moved
144 * here temporarily in rcu_process_callbacks().
145 */
146 struct rcu_head *nxtlist;
147 struct rcu_head **nxttail[RCU_NEXT_SIZE];
148 long qlen; /* # of queued callbacks */
149 long blimit; /* Upper limit on a processed batch */
150
151#ifdef CONFIG_NO_HZ
152 /* 3) dynticks interface. */
153 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
154 int dynticks_snap; /* Per-GP tracking for dynticks. */
155 int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
156#endif /* #ifdef CONFIG_NO_HZ */
157
158 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
159#ifdef CONFIG_NO_HZ
160 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
161#endif /* #ifdef CONFIG_NO_HZ */
162 unsigned long offline_fqs; /* Kicked due to being offline. */
163 unsigned long resched_ipi; /* Sent a resched IPI. */
164
165 /* 5) state to allow this CPU to force_quiescent_state on others */
166 long n_rcu_pending; /* rcu_pending() calls since boot. */
167 long n_rcu_pending_force_qs; /* when to force quiescent states. */
168
169 int cpu;
170};
171
172/* Values for signaled field in struct rcu_state. */
173#define RCU_GP_INIT 0 /* Grace period being initialized. */
174#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */
175#define RCU_FORCE_QS 2 /* Need to force quiescent state. */
176#ifdef CONFIG_NO_HZ
177#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
178#else /* #ifdef CONFIG_NO_HZ */
179#define RCU_SIGNAL_INIT RCU_FORCE_QS
180#endif /* #else #ifdef CONFIG_NO_HZ */
181
182#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
183#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
184#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */
185#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */
186#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
187 /* to take at least one */
188 /* scheduling clock irq */
189 /* before ratting on them. */
190
191#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
192
193/*
194 * RCU global state, including node hierarchy. This hierarchy is
195 * represented in "heap" form in a dense array. The root (first level)
196 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
197 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
198 * and the third level in ->node[m+1] and following (->node[m+1] referenced
199 * by ->level[2]). The number of levels is determined by the number of
200 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
201 * consisting of a single rcu_node.
202 */
203struct rcu_state {
204 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
205 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
206 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
207 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
208 struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */
209
210 /* The following fields are guarded by the root rcu_node's lock. */
211
212 u8 signaled ____cacheline_internodealigned_in_smp;
213 /* Force QS state. */
214 long gpnum; /* Current gp number. */
215 long completed; /* # of last completed gp. */
216 spinlock_t onofflock; /* exclude on/offline and */
217 /* starting new GP. */
218 spinlock_t fqslock; /* Only one task forcing */
219 /* quiescent states. */
220 unsigned long jiffies_force_qs; /* Time at which to invoke */
221 /* force_quiescent_state(). */
222 unsigned long n_force_qs; /* Number of calls to */
223 /* force_quiescent_state(). */
224 unsigned long n_force_qs_lh; /* ~Number of calls leaving */
225 /* due to lock unavailable. */
226 unsigned long n_force_qs_ngp; /* Number of calls leaving */
227 /* due to no GP active. */
228#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
229 unsigned long gp_start; /* Time at which GP started, */
230 /* but in jiffies. */
231 unsigned long jiffies_stall; /* Time at which to check */
232 /* for CPU stalls. */
233#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
234#ifdef CONFIG_NO_HZ
235 long dynticks_completed; /* Value of completed @ snap. */
236#endif /* #ifdef CONFIG_NO_HZ */
237};
238
239extern struct rcu_state rcu_state;
240DECLARE_PER_CPU(struct rcu_data, rcu_data);
241
242extern struct rcu_state rcu_bh_state;
243DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
244
245/*
246 * Increment the quiescent state counter.
247 * The counter is a bit degenerated: We do not need to know
248 * how many quiescent states passed, just if there was at least
249 * one since the start of the grace period. Thus just a flag.
250 */
251static inline void rcu_qsctr_inc(int cpu)
252{
253 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
254 rdp->passed_quiesc = 1;
255 rdp->passed_quiesc_completed = rdp->completed;
256}
257static inline void rcu_bh_qsctr_inc(int cpu)
258{
259 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
260 rdp->passed_quiesc = 1;
261 rdp->passed_quiesc_completed = rdp->completed;
262}
263
264extern int rcu_pending(int cpu);
265extern int rcu_needs_cpu(int cpu);
266
267#ifdef CONFIG_DEBUG_LOCK_ALLOC
268extern struct lockdep_map rcu_lock_map;
269# define rcu_read_acquire() \
270 lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
271# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
272#else
273# define rcu_read_acquire() do { } while (0)
274# define rcu_read_release() do { } while (0)
275#endif
276
277static inline void __rcu_read_lock(void)
278{
279 preempt_disable();
280 __acquire(RCU);
281 rcu_read_acquire();
282}
283static inline void __rcu_read_unlock(void)
284{
285 rcu_read_release();
286 __release(RCU);
287 preempt_enable();
288}
289static inline void __rcu_read_lock_bh(void)
290{
291 local_bh_disable();
292 __acquire(RCU_BH);
293 rcu_read_acquire();
294}
295static inline void __rcu_read_unlock_bh(void)
296{
297 rcu_read_release();
298 __release(RCU_BH);
299 local_bh_enable();
300}
301
302#define __synchronize_sched() synchronize_rcu()
303
304#define call_rcu_sched(head, func) call_rcu(head, func)
305
306static inline void rcu_init_sched(void)
307{
308}
309
310extern void __rcu_init(void);
311extern void rcu_check_callbacks(int cpu, int user);
312extern void rcu_restart_cpu(int cpu);
313
314extern long rcu_batches_completed(void);
315extern long rcu_batches_completed_bh(void);
316
317#ifdef CONFIG_NO_HZ
318void rcu_enter_nohz(void);
319void rcu_exit_nohz(void);
320#else /* CONFIG_NO_HZ */
321static inline void rcu_enter_nohz(void)
322{
323}
324static inline void rcu_exit_nohz(void)
325{
326}
327#endif /* CONFIG_NO_HZ */
328
329#endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b18ec5533e8c..325af1de0351 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,9 +7,31 @@ struct device;
7struct dma_attrs; 7struct dma_attrs;
8struct scatterlist; 8struct scatterlist;
9 9
10/*
11 * Maximum allowable number of contiguous slabs to map,
12 * must be a power of 2. What is the appropriate value ?
13 * The complexity of {map,unmap}_single is linearly dependent on this value.
14 */
15#define IO_TLB_SEGSIZE 128
16
17
18/*
19 * log of the size of each IO TLB slab. The number of slabs is command line
20 * controllable.
21 */
22#define IO_TLB_SHIFT 11
23
10extern void 24extern void
11swiotlb_init(void); 25swiotlb_init(void);
12 26
27extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
28extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
29
30extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
31extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
32
33extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
34
13extern void 35extern void
14*swiotlb_alloc_coherent(struct device *hwdev, size_t size, 36*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
15 dma_addr_t *dma_handle, gfp_t flags); 37 dma_addr_t *dma_handle, gfp_t flags);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index fec6decfb983..6b58367d145e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
78 \ 78 \
79 set_fs(KERNEL_DS); \ 79 set_fs(KERNEL_DS); \
80 pagefault_disable(); \ 80 pagefault_disable(); \
81 ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ 81 ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
82 pagefault_enable(); \ 82 pagefault_enable(); \
83 set_fs(old_fs); \ 83 set_fs(old_fs); \
84 ret; \ 84 ret; \
diff --git a/init/Kconfig b/init/Kconfig
index f763762d544a..6b0fdedf3596 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -928,10 +928,90 @@ source "block/Kconfig"
928config PREEMPT_NOTIFIERS 928config PREEMPT_NOTIFIERS
929 bool 929 bool
930 930
931choice
932 prompt "RCU Implementation"
933 default CLASSIC_RCU
934
931config CLASSIC_RCU 935config CLASSIC_RCU
932 def_bool !PREEMPT_RCU 936 bool "Classic RCU"
933 help 937 help
934 This option selects the classic RCU implementation that is 938 This option selects the classic RCU implementation that is
935 designed for best read-side performance on non-realtime 939 designed for best read-side performance on non-realtime
936 systems. Classic RCU is the default. Note that the 940 systems.
937 PREEMPT_RCU symbol is used to select/deselect this option. 941
942 Select this option if you are unsure.
943
944config TREE_RCU
945 bool "Tree-based hierarchical RCU"
946 help
947 This option selects the RCU implementation that is
948 designed for very large SMP system with hundreds or
949 thousands of CPUs.
950
951config PREEMPT_RCU
952 bool "Preemptible RCU"
953 depends on PREEMPT
954 help
955 This option reduces the latency of the kernel by making certain
956 RCU sections preemptible. Normally RCU code is non-preemptible, if
957 this option is selected then read-only RCU sections become
958 preemptible. This helps latency, but may expose bugs due to
959 now-naive assumptions about each RCU read-side critical section
960 remaining on a given CPU through its execution.
961
962endchoice
963
964config RCU_TRACE
965 bool "Enable tracing for RCU"
966 depends on TREE_RCU || PREEMPT_RCU
967 help
968 This option provides tracing in RCU which presents stats
969 in debugfs for debugging RCU implementation.
970
971 Say Y here if you want to enable RCU tracing
972 Say N if you are unsure.
973
974config RCU_FANOUT
975 int "Tree-based hierarchical RCU fanout value"
976 range 2 64 if 64BIT
977 range 2 32 if !64BIT
978 depends on TREE_RCU
979 default 64 if 64BIT
980 default 32 if !64BIT
981 help
982 This option controls the fanout of hierarchical implementations
983 of RCU, allowing RCU to work efficiently on machines with
984 large numbers of CPUs. This value must be at least the cube
985 root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
986 systems and up to 262,144 for 64-bit systems.
987
988 Select a specific number if testing RCU itself.
989 Take the default if unsure.
990
991config RCU_FANOUT_EXACT
992 bool "Disable tree-based hierarchical RCU auto-balancing"
993 depends on TREE_RCU
994 default n
995 help
996 This option forces use of the exact RCU_FANOUT value specified,
997 regardless of imbalances in the hierarchy. This is useful for
998 testing RCU itself, and might one day be useful on systems with
999 strong NUMA behavior.
1000
1001 Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
1002
1003 Say N if unsure.
1004
1005config TREE_RCU_TRACE
1006 def_bool RCU_TRACE && TREE_RCU
1007 select DEBUG_FS
1008 help
1009 This option provides tracing for the TREE_RCU implementation,
1010 permitting Makefile to trivially select kernel/rcutree_trace.c.
1011
1012config PREEMPT_RCU_TRACE
1013 def_bool RCU_TRACE && PREEMPT_RCU
1014 select DEBUG_FS
1015 help
1016 This option provides tracing for the PREEMPT_RCU implementation,
1017 permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 9fdba03dc1fc..bf987b95b356 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,28 +52,3 @@ config PREEMPT
52 52
53endchoice 53endchoice
54 54
55config PREEMPT_RCU
56 bool "Preemptible RCU"
57 depends on PREEMPT
58 default n
59 help
60 This option reduces the latency of the kernel by making certain
61 RCU sections preemptible. Normally RCU code is non-preemptible, if
62 this option is selected then read-only RCU sections become
63 preemptible. This helps latency, but may expose bugs due to
64 now-naive assumptions about each RCU read-side critical section
65 remaining on a given CPU through its execution.
66
67 Say N if you are unsure.
68
69config RCU_TRACE
70 bool "Enable tracing for RCU - currently stats in debugfs"
71 depends on PREEMPT_RCU
72 select DEBUG_FS
73 default y
74 help
75 This option provides tracing in RCU which presents stats
76 in debugfs for debugging RCU implementation.
77
78 Say Y here if you want to enable RCU tracing
79 Say N if you are unsure.
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..b4fdbbff5ec0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
74obj-$(CONFIG_SECCOMP) += seccomp.o 74obj-$(CONFIG_SECCOMP) += seccomp.o
75obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 75obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
76obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o 76obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
77obj-$(CONFIG_TREE_RCU) += rcutree.o
77obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o 78obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
78ifeq ($(CONFIG_PREEMPT_RCU),y) 79obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
79obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o 80obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
80endif
81obj-$(CONFIG_RELAY) += relay.o 81obj-$(CONFIG_RELAY) += relay.o
82obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 82obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
83obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 83obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8185a0f09594..2606d0fb4e54 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1024,7 +1024,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1024 if (ret == -EBUSY) { 1024 if (ret == -EBUSY) {
1025 mutex_unlock(&cgroup_mutex); 1025 mutex_unlock(&cgroup_mutex);
1026 mutex_unlock(&inode->i_mutex); 1026 mutex_unlock(&inode->i_mutex);
1027 goto drop_new_super; 1027 goto free_cg_links;
1028 } 1028 }
1029 1029
1030 /* EBUSY should be the only error here */ 1030 /* EBUSY should be the only error here */
@@ -1073,10 +1073,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1073 1073
1074 return simple_set_mnt(mnt, sb); 1074 return simple_set_mnt(mnt, sb);
1075 1075
1076 free_cg_links:
1077 free_cg_links(&tmp_cg_links);
1076 drop_new_super: 1078 drop_new_super:
1077 up_write(&sb->s_umount); 1079 up_write(&sb->s_umount);
1078 deactivate_super(sb); 1080 deactivate_super(sb);
1079 free_cg_links(&tmp_cg_links);
1080 return ret; 1081 return ret;
1081} 1082}
1082 1083
@@ -2934,9 +2935,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2934 again: 2935 again:
2935 root = subsys->root; 2936 root = subsys->root;
2936 if (root == &rootnode) { 2937 if (root == &rootnode) {
2937 printk(KERN_INFO
2938 "Not cloning cgroup for unused subsystem %s\n",
2939 subsys->name);
2940 mutex_unlock(&cgroup_mutex); 2938 mutex_unlock(&cgroup_mutex);
2941 return 0; 2939 return 0;
2942 } 2940 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f7..30fcdf16737a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1321,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
1321 * group, which consolidates times for all threads in the 1321 * group, which consolidates times for all threads in the
1322 * group including the group leader. 1322 * group including the group leader.
1323 */ 1323 */
1324 thread_group_cputime(p, &cputime);
1324 spin_lock_irq(&p->parent->sighand->siglock); 1325 spin_lock_irq(&p->parent->sighand->siglock);
1325 psig = p->parent->signal; 1326 psig = p->parent->signal;
1326 sig = p->signal; 1327 sig = p->signal;
1327 thread_group_cputime(p, &cputime);
1328 psig->cutime = 1328 psig->cutime =
1329 cputime_add(psig->cutime, 1329 cputime_add(psig->cutime,
1330 cputime_add(cputime.utime, 1330 cputime_add(cputime.utime,
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
66 return 1; 66 return 1;
67 return module_text_address(addr) != NULL; 67 return module_text_address(addr) != NULL;
68} 68}
69
70/*
71 * On some architectures (PPC64, IA64) function pointers
72 * are actually only tokens to some data that then holds the
73 * real function address. As a result, to find if a function
74 * pointer is part of the kernel text, we need to do some
75 * special dereferencing first.
76 */
77int func_ptr_is_kernel_text(void *ptr)
78{
79 unsigned long addr;
80 addr = (unsigned long) dereference_function_descriptor(ptr);
81 if (core_kernel_text(addr))
82 return 1;
83 return module_text_address(addr) != NULL;
84}
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514b..b4f87bac91c1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -92,11 +92,12 @@ struct futex_pi_state {
92 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 92 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
93 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 93 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
94 * The order of wakup is always to make the first condition true, then 94 * The order of wakup is always to make the first condition true, then
95 * wake up q->waiters, then make the second condition true. 95 * wake up q->waiter, then make the second condition true.
96 */ 96 */
97struct futex_q { 97struct futex_q {
98 struct plist_node list; 98 struct plist_node list;
99 wait_queue_head_t waiters; 99 /* There can only be a single waiter */
100 wait_queue_head_t waiter;
100 101
101 /* Which hash list lock to use: */ 102 /* Which hash list lock to use: */
102 spinlock_t *lock_ptr; 103 spinlock_t *lock_ptr;
@@ -123,24 +124,6 @@ struct futex_hash_bucket {
123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 124static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
124 125
125/* 126/*
126 * Take mm->mmap_sem, when futex is shared
127 */
128static inline void futex_lock_mm(struct rw_semaphore *fshared)
129{
130 if (fshared)
131 down_read(fshared);
132}
133
134/*
135 * Release mm->mmap_sem, when the futex is shared
136 */
137static inline void futex_unlock_mm(struct rw_semaphore *fshared)
138{
139 if (fshared)
140 up_read(fshared);
141}
142
143/*
144 * We hash on the keys returned from get_futex_key (see below). 127 * We hash on the keys returned from get_futex_key (see below).
145 */ 128 */
146static struct futex_hash_bucket *hash_futex(union futex_key *key) 129static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +144,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
161 && key1->both.offset == key2->both.offset); 144 && key1->both.offset == key2->both.offset);
162} 145}
163 146
147/*
148 * Take a reference to the resource addressed by a key.
149 * Can be called while holding spinlocks.
150 *
151 */
152static void get_futex_key_refs(union futex_key *key)
153{
154 if (!key->both.ptr)
155 return;
156
157 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
158 case FUT_OFF_INODE:
159 atomic_inc(&key->shared.inode->i_count);
160 break;
161 case FUT_OFF_MMSHARED:
162 atomic_inc(&key->private.mm->mm_count);
163 break;
164 }
165}
166
167/*
168 * Drop a reference to the resource addressed by a key.
169 * The hash bucket spinlock must not be held.
170 */
171static void drop_futex_key_refs(union futex_key *key)
172{
173 if (!key->both.ptr)
174 return;
175
176 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
177 case FUT_OFF_INODE:
178 iput(key->shared.inode);
179 break;
180 case FUT_OFF_MMSHARED:
181 mmdrop(key->private.mm);
182 break;
183 }
184}
185
164/** 186/**
165 * get_futex_key - Get parameters which are the keys for a futex. 187 * get_futex_key - Get parameters which are the keys for a futex.
166 * @uaddr: virtual address of the futex 188 * @uaddr: virtual address of the futex
@@ -179,12 +201,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
179 * For other futexes, it points to &current->mm->mmap_sem and 201 * For other futexes, it points to &current->mm->mmap_sem and
180 * caller must have taken the reader lock. but NOT any spinlocks. 202 * caller must have taken the reader lock. but NOT any spinlocks.
181 */ 203 */
182static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, 204static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
183 union futex_key *key)
184{ 205{
185 unsigned long address = (unsigned long)uaddr; 206 unsigned long address = (unsigned long)uaddr;
186 struct mm_struct *mm = current->mm; 207 struct mm_struct *mm = current->mm;
187 struct vm_area_struct *vma;
188 struct page *page; 208 struct page *page;
189 int err; 209 int err;
190 210
@@ -208,100 +228,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
208 return -EFAULT; 228 return -EFAULT;
209 key->private.mm = mm; 229 key->private.mm = mm;
210 key->private.address = address; 230 key->private.address = address;
231 get_futex_key_refs(key);
211 return 0; 232 return 0;
212 } 233 }
213 /*
214 * The futex is hashed differently depending on whether
215 * it's in a shared or private mapping. So check vma first.
216 */
217 vma = find_extend_vma(mm, address);
218 if (unlikely(!vma))
219 return -EFAULT;
220 234
221 /* 235again:
222 * Permissions. 236 err = get_user_pages_fast(address, 1, 0, &page);
223 */ 237 if (err < 0)
224 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 238 return err;
225 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 239
240 lock_page(page);
241 if (!page->mapping) {
242 unlock_page(page);
243 put_page(page);
244 goto again;
245 }
226 246
227 /* 247 /*
228 * Private mappings are handled in a simple way. 248 * Private mappings are handled in a simple way.
229 * 249 *
230 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 250 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
231 * it's a read-only handle, it's expected that futexes attach to 251 * it's a read-only handle, it's expected that futexes attach to
232 * the object not the particular process. Therefore we use 252 * the object not the particular process.
233 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
234 * mappings of _writable_ handles.
235 */ 253 */
236 if (likely(!(vma->vm_flags & VM_MAYSHARE))) { 254 if (PageAnon(page)) {
237 key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ 255 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
238 key->private.mm = mm; 256 key->private.mm = mm;
239 key->private.address = address; 257 key->private.address = address;
240 return 0; 258 } else {
259 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
260 key->shared.inode = page->mapping->host;
261 key->shared.pgoff = page->index;
241 } 262 }
242 263
243 /* 264 get_futex_key_refs(key);
244 * Linear file mappings are also simple.
245 */
246 key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
247 key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
248 if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
249 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
250 + vma->vm_pgoff);
251 return 0;
252 }
253 265
254 /* 266 unlock_page(page);
255 * We could walk the page table to read the non-linear 267 put_page(page);
256 * pte, and get the page index without fetching the page 268 return 0;
257 * from swap. But that's a lot of code to duplicate here
258 * for a rare case, so we simply fetch the page.
259 */
260 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
261 if (err >= 0) {
262 key->shared.pgoff =
263 page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
264 put_page(page);
265 return 0;
266 }
267 return err;
268}
269
270/*
271 * Take a reference to the resource addressed by a key.
272 * Can be called while holding spinlocks.
273 *
274 */
275static void get_futex_key_refs(union futex_key *key)
276{
277 if (key->both.ptr == NULL)
278 return;
279 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
280 case FUT_OFF_INODE:
281 atomic_inc(&key->shared.inode->i_count);
282 break;
283 case FUT_OFF_MMSHARED:
284 atomic_inc(&key->private.mm->mm_count);
285 break;
286 }
287} 269}
288 270
289/* 271static inline
290 * Drop a reference to the resource addressed by a key. 272void put_futex_key(int fshared, union futex_key *key)
291 * The hash bucket spinlock must not be held.
292 */
293static void drop_futex_key_refs(union futex_key *key)
294{ 273{
295 if (!key->both.ptr) 274 drop_futex_key_refs(key);
296 return;
297 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
298 case FUT_OFF_INODE:
299 iput(key->shared.inode);
300 break;
301 case FUT_OFF_MMSHARED:
302 mmdrop(key->private.mm);
303 break;
304 }
305} 275}
306 276
307static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 277static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +298,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
328 298
329/* 299/*
330 * Fault handling. 300 * Fault handling.
331 * if fshared is non NULL, current->mm->mmap_sem is already held
332 */ 301 */
333static int futex_handle_fault(unsigned long address, 302static int futex_handle_fault(unsigned long address, int attempt)
334 struct rw_semaphore *fshared, int attempt)
335{ 303{
336 struct vm_area_struct * vma; 304 struct vm_area_struct * vma;
337 struct mm_struct *mm = current->mm; 305 struct mm_struct *mm = current->mm;
@@ -340,8 +308,7 @@ static int futex_handle_fault(unsigned long address,
340 if (attempt > 2) 308 if (attempt > 2)
341 return ret; 309 return ret;
342 310
343 if (!fshared) 311 down_read(&mm->mmap_sem);
344 down_read(&mm->mmap_sem);
345 vma = find_vma(mm, address); 312 vma = find_vma(mm, address);
346 if (vma && address >= vma->vm_start && 313 if (vma && address >= vma->vm_start &&
347 (vma->vm_flags & VM_WRITE)) { 314 (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +328,7 @@ static int futex_handle_fault(unsigned long address,
361 current->min_flt++; 328 current->min_flt++;
362 } 329 }
363 } 330 }
364 if (!fshared) 331 up_read(&mm->mmap_sem);
365 up_read(&mm->mmap_sem);
366 return ret; 332 return ret;
367} 333}
368 334
@@ -385,6 +351,7 @@ static int refill_pi_state_cache(void)
385 /* pi_mutex gets initialized later */ 351 /* pi_mutex gets initialized later */
386 pi_state->owner = NULL; 352 pi_state->owner = NULL;
387 atomic_set(&pi_state->refcount, 1); 353 atomic_set(&pi_state->refcount, 1);
354 pi_state->key = FUTEX_KEY_INIT;
388 355
389 current->pi_state_cache = pi_state; 356 current->pi_state_cache = pi_state;
390 357
@@ -462,7 +429,7 @@ void exit_pi_state_list(struct task_struct *curr)
462 struct list_head *next, *head = &curr->pi_state_list; 429 struct list_head *next, *head = &curr->pi_state_list;
463 struct futex_pi_state *pi_state; 430 struct futex_pi_state *pi_state;
464 struct futex_hash_bucket *hb; 431 struct futex_hash_bucket *hb;
465 union futex_key key; 432 union futex_key key = FUTEX_KEY_INIT;
466 433
467 if (!futex_cmpxchg_enabled) 434 if (!futex_cmpxchg_enabled)
468 return; 435 return;
@@ -607,7 +574,7 @@ static void wake_futex(struct futex_q *q)
607 * The lock in wake_up_all() is a crucial memory barrier after the 574 * The lock in wake_up_all() is a crucial memory barrier after the
608 * plist_del() and also before assigning to q->lock_ptr. 575 * plist_del() and also before assigning to q->lock_ptr.
609 */ 576 */
610 wake_up_all(&q->waiters); 577 wake_up(&q->waiter);
611 /* 578 /*
612 * The waiting task can free the futex_q as soon as this is written, 579 * The waiting task can free the futex_q as soon as this is written,
613 * without taking any locks. This must come last. 580 * without taking any locks. This must come last.
@@ -719,20 +686,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
719 * Wake up all waiters hashed on the physical page that is mapped 686 * Wake up all waiters hashed on the physical page that is mapped
720 * to this virtual address: 687 * to this virtual address:
721 */ 688 */
722static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 689static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
723 int nr_wake, u32 bitset)
724{ 690{
725 struct futex_hash_bucket *hb; 691 struct futex_hash_bucket *hb;
726 struct futex_q *this, *next; 692 struct futex_q *this, *next;
727 struct plist_head *head; 693 struct plist_head *head;
728 union futex_key key; 694 union futex_key key = FUTEX_KEY_INIT;
729 int ret; 695 int ret;
730 696
731 if (!bitset) 697 if (!bitset)
732 return -EINVAL; 698 return -EINVAL;
733 699
734 futex_lock_mm(fshared);
735
736 ret = get_futex_key(uaddr, fshared, &key); 700 ret = get_futex_key(uaddr, fshared, &key);
737 if (unlikely(ret != 0)) 701 if (unlikely(ret != 0))
738 goto out; 702 goto out;
@@ -760,7 +724,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
760 724
761 spin_unlock(&hb->lock); 725 spin_unlock(&hb->lock);
762out: 726out:
763 futex_unlock_mm(fshared); 727 put_futex_key(fshared, &key);
764 return ret; 728 return ret;
765} 729}
766 730
@@ -769,19 +733,16 @@ out:
769 * to this virtual address: 733 * to this virtual address:
770 */ 734 */
771static int 735static int
772futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, 736futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
773 u32 __user *uaddr2,
774 int nr_wake, int nr_wake2, int op) 737 int nr_wake, int nr_wake2, int op)
775{ 738{
776 union futex_key key1, key2; 739 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
777 struct futex_hash_bucket *hb1, *hb2; 740 struct futex_hash_bucket *hb1, *hb2;
778 struct plist_head *head; 741 struct plist_head *head;
779 struct futex_q *this, *next; 742 struct futex_q *this, *next;
780 int ret, op_ret, attempt = 0; 743 int ret, op_ret, attempt = 0;
781 744
782retryfull: 745retryfull:
783 futex_lock_mm(fshared);
784
785 ret = get_futex_key(uaddr1, fshared, &key1); 746 ret = get_futex_key(uaddr1, fshared, &key1);
786 if (unlikely(ret != 0)) 747 if (unlikely(ret != 0))
787 goto out; 748 goto out;
@@ -826,18 +787,12 @@ retry:
826 */ 787 */
827 if (attempt++) { 788 if (attempt++) {
828 ret = futex_handle_fault((unsigned long)uaddr2, 789 ret = futex_handle_fault((unsigned long)uaddr2,
829 fshared, attempt); 790 attempt);
830 if (ret) 791 if (ret)
831 goto out; 792 goto out;
832 goto retry; 793 goto retry;
833 } 794 }
834 795
835 /*
836 * If we would have faulted, release mmap_sem,
837 * fault it in and start all over again.
838 */
839 futex_unlock_mm(fshared);
840
841 ret = get_user(dummy, uaddr2); 796 ret = get_user(dummy, uaddr2);
842 if (ret) 797 if (ret)
843 return ret; 798 return ret;
@@ -873,7 +828,8 @@ retry:
873 if (hb1 != hb2) 828 if (hb1 != hb2)
874 spin_unlock(&hb2->lock); 829 spin_unlock(&hb2->lock);
875out: 830out:
876 futex_unlock_mm(fshared); 831 put_futex_key(fshared, &key2);
832 put_futex_key(fshared, &key1);
877 833
878 return ret; 834 return ret;
879} 835}
@@ -882,19 +838,16 @@ out:
882 * Requeue all waiters hashed on one physical page to another 838 * Requeue all waiters hashed on one physical page to another
883 * physical page. 839 * physical page.
884 */ 840 */
885static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, 841static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
886 u32 __user *uaddr2,
887 int nr_wake, int nr_requeue, u32 *cmpval) 842 int nr_wake, int nr_requeue, u32 *cmpval)
888{ 843{
889 union futex_key key1, key2; 844 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
890 struct futex_hash_bucket *hb1, *hb2; 845 struct futex_hash_bucket *hb1, *hb2;
891 struct plist_head *head1; 846 struct plist_head *head1;
892 struct futex_q *this, *next; 847 struct futex_q *this, *next;
893 int ret, drop_count = 0; 848 int ret, drop_count = 0;
894 849
895 retry: 850 retry:
896 futex_lock_mm(fshared);
897
898 ret = get_futex_key(uaddr1, fshared, &key1); 851 ret = get_futex_key(uaddr1, fshared, &key1);
899 if (unlikely(ret != 0)) 852 if (unlikely(ret != 0))
900 goto out; 853 goto out;
@@ -917,12 +870,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
917 if (hb1 != hb2) 870 if (hb1 != hb2)
918 spin_unlock(&hb2->lock); 871 spin_unlock(&hb2->lock);
919 872
920 /*
921 * If we would have faulted, release mmap_sem, fault
922 * it in and start all over again.
923 */
924 futex_unlock_mm(fshared);
925
926 ret = get_user(curval, uaddr1); 873 ret = get_user(curval, uaddr1);
927 874
928 if (!ret) 875 if (!ret)
@@ -974,7 +921,8 @@ out_unlock:
974 drop_futex_key_refs(&key1); 921 drop_futex_key_refs(&key1);
975 922
976out: 923out:
977 futex_unlock_mm(fshared); 924 put_futex_key(fshared, &key2);
925 put_futex_key(fshared, &key1);
978 return ret; 926 return ret;
979} 927}
980 928
@@ -983,7 +931,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
983{ 931{
984 struct futex_hash_bucket *hb; 932 struct futex_hash_bucket *hb;
985 933
986 init_waitqueue_head(&q->waiters); 934 init_waitqueue_head(&q->waiter);
987 935
988 get_futex_key_refs(&q->key); 936 get_futex_key_refs(&q->key);
989 hb = hash_futex(&q->key); 937 hb = hash_futex(&q->key);
@@ -1096,8 +1044,7 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1044 * private futexes.
1097 */ 1045 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1046static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner, 1047 struct task_struct *newowner, int fshared)
1100 struct rw_semaphore *fshared)
1101{ 1048{
1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1049 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1103 struct futex_pi_state *pi_state = q->pi_state; 1050 struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1123,7 @@ retry:
1176handle_fault: 1123handle_fault:
1177 spin_unlock(q->lock_ptr); 1124 spin_unlock(q->lock_ptr);
1178 1125
1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); 1126 ret = futex_handle_fault((unsigned long)uaddr, attempt++);
1180 1127
1181 spin_lock(q->lock_ptr); 1128 spin_lock(q->lock_ptr);
1182 1129
@@ -1196,12 +1143,13 @@ handle_fault:
1196 * In case we must use restart_block to restart a futex_wait, 1143 * In case we must use restart_block to restart a futex_wait,
1197 * we encode in the 'flags' shared capability 1144 * we encode in the 'flags' shared capability
1198 */ 1145 */
1199#define FLAGS_SHARED 1 1146#define FLAGS_SHARED 0x01
1147#define FLAGS_CLOCKRT 0x02
1200 1148
1201static long futex_wait_restart(struct restart_block *restart); 1149static long futex_wait_restart(struct restart_block *restart);
1202 1150
1203static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1151static int futex_wait(u32 __user *uaddr, int fshared,
1204 u32 val, ktime_t *abs_time, u32 bitset) 1152 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1205{ 1153{
1206 struct task_struct *curr = current; 1154 struct task_struct *curr = current;
1207 DECLARE_WAITQUEUE(wait, curr); 1155 DECLARE_WAITQUEUE(wait, curr);
@@ -1218,8 +1166,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1218 q.pi_state = NULL; 1166 q.pi_state = NULL;
1219 q.bitset = bitset; 1167 q.bitset = bitset;
1220 retry: 1168 retry:
1221 futex_lock_mm(fshared); 1169 q.key = FUTEX_KEY_INIT;
1222
1223 ret = get_futex_key(uaddr, fshared, &q.key); 1170 ret = get_futex_key(uaddr, fshared, &q.key);
1224 if (unlikely(ret != 0)) 1171 if (unlikely(ret != 0))
1225 goto out_release_sem; 1172 goto out_release_sem;
@@ -1251,12 +1198,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1251 if (unlikely(ret)) { 1198 if (unlikely(ret)) {
1252 queue_unlock(&q, hb); 1199 queue_unlock(&q, hb);
1253 1200
1254 /*
1255 * If we would have faulted, release mmap_sem, fault it in and
1256 * start all over again.
1257 */
1258 futex_unlock_mm(fshared);
1259
1260 ret = get_user(uval, uaddr); 1201 ret = get_user(uval, uaddr);
1261 1202
1262 if (!ret) 1203 if (!ret)
@@ -1271,12 +1212,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1271 queue_me(&q, hb); 1212 queue_me(&q, hb);
1272 1213
1273 /* 1214 /*
1274 * Now the futex is queued and we have checked the data, we
1275 * don't want to hold mmap_sem while we sleep.
1276 */
1277 futex_unlock_mm(fshared);
1278
1279 /*
1280 * There might have been scheduling since the queue_me(), as we 1215 * There might have been scheduling since the queue_me(), as we
1281 * cannot hold a spinlock across the get_user() in case it 1216 * cannot hold a spinlock across the get_user() in case it
1282 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1217 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1287,7 +1222,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1287 1222
1288 /* add_wait_queue is the barrier after __set_current_state. */ 1223 /* add_wait_queue is the barrier after __set_current_state. */
1289 __set_current_state(TASK_INTERRUPTIBLE); 1224 __set_current_state(TASK_INTERRUPTIBLE);
1290 add_wait_queue(&q.waiters, &wait); 1225 add_wait_queue(&q.waiter, &wait);
1291 /* 1226 /*
1292 * !plist_node_empty() is safe here without any lock. 1227 * !plist_node_empty() is safe here without any lock.
1293 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1228 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
@@ -1300,8 +1235,10 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1300 slack = current->timer_slack_ns; 1235 slack = current->timer_slack_ns;
1301 if (rt_task(current)) 1236 if (rt_task(current))
1302 slack = 0; 1237 slack = 0;
1303 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, 1238 hrtimer_init_on_stack(&t.timer,
1304 HRTIMER_MODE_ABS); 1239 clockrt ? CLOCK_REALTIME :
1240 CLOCK_MONOTONIC,
1241 HRTIMER_MODE_ABS);
1305 hrtimer_init_sleeper(&t, current); 1242 hrtimer_init_sleeper(&t, current);
1306 hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); 1243 hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
1307 1244
@@ -1356,6 +1293,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1356 1293
1357 if (fshared) 1294 if (fshared)
1358 restart->futex.flags |= FLAGS_SHARED; 1295 restart->futex.flags |= FLAGS_SHARED;
1296 if (clockrt)
1297 restart->futex.flags |= FLAGS_CLOCKRT;
1359 return -ERESTART_RESTARTBLOCK; 1298 return -ERESTART_RESTARTBLOCK;
1360 } 1299 }
1361 1300
@@ -1363,7 +1302,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1363 queue_unlock(&q, hb); 1302 queue_unlock(&q, hb);
1364 1303
1365 out_release_sem: 1304 out_release_sem:
1366 futex_unlock_mm(fshared); 1305 put_futex_key(fshared, &q.key);
1367 return ret; 1306 return ret;
1368} 1307}
1369 1308
@@ -1371,15 +1310,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1371static long futex_wait_restart(struct restart_block *restart) 1310static long futex_wait_restart(struct restart_block *restart)
1372{ 1311{
1373 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1312 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1374 struct rw_semaphore *fshared = NULL; 1313 int fshared = 0;
1375 ktime_t t; 1314 ktime_t t;
1376 1315
1377 t.tv64 = restart->futex.time; 1316 t.tv64 = restart->futex.time;
1378 restart->fn = do_no_restart_syscall; 1317 restart->fn = do_no_restart_syscall;
1379 if (restart->futex.flags & FLAGS_SHARED) 1318 if (restart->futex.flags & FLAGS_SHARED)
1380 fshared = &current->mm->mmap_sem; 1319 fshared = 1;
1381 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1320 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1382 restart->futex.bitset); 1321 restart->futex.bitset,
1322 restart->futex.flags & FLAGS_CLOCKRT);
1383} 1323}
1384 1324
1385 1325
@@ -1389,7 +1329,7 @@ static long futex_wait_restart(struct restart_block *restart)
1389 * if there are waiters then it will block, it does PI, etc. (Due to 1329 * if there are waiters then it will block, it does PI, etc. (Due to
1390 * races the kernel might see a 0 value of the futex too.) 1330 * races the kernel might see a 0 value of the futex too.)
1391 */ 1331 */
1392static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, 1332static int futex_lock_pi(u32 __user *uaddr, int fshared,
1393 int detect, ktime_t *time, int trylock) 1333 int detect, ktime_t *time, int trylock)
1394{ 1334{
1395 struct hrtimer_sleeper timeout, *to = NULL; 1335 struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1352,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1412 1352
1413 q.pi_state = NULL; 1353 q.pi_state = NULL;
1414 retry: 1354 retry:
1415 futex_lock_mm(fshared); 1355 q.key = FUTEX_KEY_INIT;
1416
1417 ret = get_futex_key(uaddr, fshared, &q.key); 1356 ret = get_futex_key(uaddr, fshared, &q.key);
1418 if (unlikely(ret != 0)) 1357 if (unlikely(ret != 0))
1419 goto out_release_sem; 1358 goto out_release_sem;
@@ -1502,7 +1441,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1502 * exit to complete. 1441 * exit to complete.
1503 */ 1442 */
1504 queue_unlock(&q, hb); 1443 queue_unlock(&q, hb);
1505 futex_unlock_mm(fshared);
1506 cond_resched(); 1444 cond_resched();
1507 goto retry; 1445 goto retry;
1508 1446
@@ -1534,12 +1472,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1534 */ 1472 */
1535 queue_me(&q, hb); 1473 queue_me(&q, hb);
1536 1474
1537 /*
1538 * Now the futex is queued and we have checked the data, we
1539 * don't want to hold mmap_sem while we sleep.
1540 */
1541 futex_unlock_mm(fshared);
1542
1543 WARN_ON(!q.pi_state); 1475 WARN_ON(!q.pi_state);
1544 /* 1476 /*
1545 * Block on the PI mutex: 1477 * Block on the PI mutex:
@@ -1552,7 +1484,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1552 ret = ret ? 0 : -EWOULDBLOCK; 1484 ret = ret ? 0 : -EWOULDBLOCK;
1553 } 1485 }
1554 1486
1555 futex_lock_mm(fshared);
1556 spin_lock(q.lock_ptr); 1487 spin_lock(q.lock_ptr);
1557 1488
1558 if (!ret) { 1489 if (!ret) {
@@ -1618,7 +1549,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1618 1549
1619 /* Unqueue and drop the lock */ 1550 /* Unqueue and drop the lock */
1620 unqueue_me_pi(&q); 1551 unqueue_me_pi(&q);
1621 futex_unlock_mm(fshared);
1622 1552
1623 if (to) 1553 if (to)
1624 destroy_hrtimer_on_stack(&to->timer); 1554 destroy_hrtimer_on_stack(&to->timer);
@@ -1628,34 +1558,30 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1628 queue_unlock(&q, hb); 1558 queue_unlock(&q, hb);
1629 1559
1630 out_release_sem: 1560 out_release_sem:
1631 futex_unlock_mm(fshared); 1561 put_futex_key(fshared, &q.key);
1632 if (to) 1562 if (to)
1633 destroy_hrtimer_on_stack(&to->timer); 1563 destroy_hrtimer_on_stack(&to->timer);
1634 return ret; 1564 return ret;
1635 1565
1636 uaddr_faulted: 1566 uaddr_faulted:
1637 /* 1567 /*
1638 * We have to r/w *(int __user *)uaddr, but we can't modify it 1568 * We have to r/w *(int __user *)uaddr, and we have to modify it
1639 * non-atomically. Therefore, if get_user below is not 1569 * atomically. Therefore, if we continue to fault after get_user()
1640 * enough, we need to handle the fault ourselves, while 1570 * below, we need to handle the fault ourselves, while still holding
1641 * still holding the mmap_sem. 1571 * the mmap_sem. This can occur if the uaddr is under contention as
1642 * 1572 * we have to drop the mmap_sem in order to call get_user().
1643 * ... and hb->lock. :-) --ANK
1644 */ 1573 */
1645 queue_unlock(&q, hb); 1574 queue_unlock(&q, hb);
1646 1575
1647 if (attempt++) { 1576 if (attempt++) {
1648 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1577 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1649 attempt);
1650 if (ret) 1578 if (ret)
1651 goto out_release_sem; 1579 goto out_release_sem;
1652 goto retry_unlocked; 1580 goto retry_unlocked;
1653 } 1581 }
1654 1582
1655 futex_unlock_mm(fshared);
1656
1657 ret = get_user(uval, uaddr); 1583 ret = get_user(uval, uaddr);
1658 if (!ret && (uval != -EFAULT)) 1584 if (!ret)
1659 goto retry; 1585 goto retry;
1660 1586
1661 if (to) 1587 if (to)
@@ -1668,13 +1594,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1668 * This is the in-kernel slowpath: we look up the PI state (if any), 1594 * This is the in-kernel slowpath: we look up the PI state (if any),
1669 * and do the rt-mutex unlock. 1595 * and do the rt-mutex unlock.
1670 */ 1596 */
1671static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) 1597static int futex_unlock_pi(u32 __user *uaddr, int fshared)
1672{ 1598{
1673 struct futex_hash_bucket *hb; 1599 struct futex_hash_bucket *hb;
1674 struct futex_q *this, *next; 1600 struct futex_q *this, *next;
1675 u32 uval; 1601 u32 uval;
1676 struct plist_head *head; 1602 struct plist_head *head;
1677 union futex_key key; 1603 union futex_key key = FUTEX_KEY_INIT;
1678 int ret, attempt = 0; 1604 int ret, attempt = 0;
1679 1605
1680retry: 1606retry:
@@ -1685,10 +1611,6 @@ retry:
1685 */ 1611 */
1686 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 1612 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1687 return -EPERM; 1613 return -EPERM;
1688 /*
1689 * First take all the futex related locks:
1690 */
1691 futex_lock_mm(fshared);
1692 1614
1693 ret = get_futex_key(uaddr, fshared, &key); 1615 ret = get_futex_key(uaddr, fshared, &key);
1694 if (unlikely(ret != 0)) 1616 if (unlikely(ret != 0))
@@ -1747,34 +1669,30 @@ retry_unlocked:
1747out_unlock: 1669out_unlock:
1748 spin_unlock(&hb->lock); 1670 spin_unlock(&hb->lock);
1749out: 1671out:
1750 futex_unlock_mm(fshared); 1672 put_futex_key(fshared, &key);
1751 1673
1752 return ret; 1674 return ret;
1753 1675
1754pi_faulted: 1676pi_faulted:
1755 /* 1677 /*
1756 * We have to r/w *(int __user *)uaddr, but we can't modify it 1678 * We have to r/w *(int __user *)uaddr, and we have to modify it
1757 * non-atomically. Therefore, if get_user below is not 1679 * atomically. Therefore, if we continue to fault after get_user()
1758 * enough, we need to handle the fault ourselves, while 1680 * below, we need to handle the fault ourselves, while still holding
1759 * still holding the mmap_sem. 1681 * the mmap_sem. This can occur if the uaddr is under contention as
1760 * 1682 * we have to drop the mmap_sem in order to call get_user().
1761 * ... and hb->lock. --ANK
1762 */ 1683 */
1763 spin_unlock(&hb->lock); 1684 spin_unlock(&hb->lock);
1764 1685
1765 if (attempt++) { 1686 if (attempt++) {
1766 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1687 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1767 attempt);
1768 if (ret) 1688 if (ret)
1769 goto out; 1689 goto out;
1770 uval = 0; 1690 uval = 0;
1771 goto retry_unlocked; 1691 goto retry_unlocked;
1772 } 1692 }
1773 1693
1774 futex_unlock_mm(fshared);
1775
1776 ret = get_user(uval, uaddr); 1694 ret = get_user(uval, uaddr);
1777 if (!ret && (uval != -EFAULT)) 1695 if (!ret)
1778 goto retry; 1696 goto retry;
1779 1697
1780 return ret; 1698 return ret;
@@ -1898,8 +1816,7 @@ retry:
1898 * PI futexes happens in exit_pi_state(): 1816 * PI futexes happens in exit_pi_state():
1899 */ 1817 */
1900 if (!pi && (uval & FUTEX_WAITERS)) 1818 if (!pi && (uval & FUTEX_WAITERS))
1901 futex_wake(uaddr, &curr->mm->mmap_sem, 1, 1819 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
1902 FUTEX_BITSET_MATCH_ANY);
1903 } 1820 }
1904 return 0; 1821 return 0;
1905} 1822}
@@ -1993,18 +1910,22 @@ void exit_robust_list(struct task_struct *curr)
1993long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 1910long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1994 u32 __user *uaddr2, u32 val2, u32 val3) 1911 u32 __user *uaddr2, u32 val2, u32 val3)
1995{ 1912{
1996 int ret = -ENOSYS; 1913 int clockrt, ret = -ENOSYS;
1997 int cmd = op & FUTEX_CMD_MASK; 1914 int cmd = op & FUTEX_CMD_MASK;
1998 struct rw_semaphore *fshared = NULL; 1915 int fshared = 0;
1999 1916
2000 if (!(op & FUTEX_PRIVATE_FLAG)) 1917 if (!(op & FUTEX_PRIVATE_FLAG))
2001 fshared = &current->mm->mmap_sem; 1918 fshared = 1;
1919
1920 clockrt = op & FUTEX_CLOCK_REALTIME;
1921 if (clockrt && cmd != FUTEX_WAIT_BITSET)
1922 return -ENOSYS;
2002 1923
2003 switch (cmd) { 1924 switch (cmd) {
2004 case FUTEX_WAIT: 1925 case FUTEX_WAIT:
2005 val3 = FUTEX_BITSET_MATCH_ANY; 1926 val3 = FUTEX_BITSET_MATCH_ANY;
2006 case FUTEX_WAIT_BITSET: 1927 case FUTEX_WAIT_BITSET:
2007 ret = futex_wait(uaddr, fshared, val, timeout, val3); 1928 ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
2008 break; 1929 break;
2009 case FUTEX_WAKE: 1930 case FUTEX_WAKE:
2010 val3 = FUTEX_BITSET_MATCH_ANY; 1931 val3 = FUTEX_BITSET_MATCH_ANY;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 801addda3c43..e9d1c8205a3b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -673,6 +673,18 @@ int request_irq(unsigned int irq, irq_handler_t handler,
673 struct irq_desc *desc; 673 struct irq_desc *desc;
674 int retval; 674 int retval;
675 675
676 /*
677 * handle_IRQ_event() always ignores IRQF_DISABLED except for
678 * the _first_ irqaction (sigh). That can cause oopsing, but
679 * the behavior is classified as "will not fix" so we need to
680 * start nudging drivers away from using that idiom.
681 */
682 if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
683 == (IRQF_SHARED|IRQF_DISABLED))
684 pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
685 "guaranteed on shared IRQs\n",
686 irq, devname);
687
676#ifdef CONFIG_LOCKDEP 688#ifdef CONFIG_LOCKDEP
677 /* 689 /*
678 * Lockdep wants atomic interrupt handlers: 690 * Lockdep wants atomic interrupt handlers:
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 46a404173db2..4fa6eeb4e8a7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
136#ifdef CONFIG_LOCK_STAT 136#ifdef CONFIG_LOCK_STAT
137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
138 138
139static int lock_contention_point(struct lock_class *class, unsigned long ip) 139static int lock_point(unsigned long points[], unsigned long ip)
140{ 140{
141 int i; 141 int i;
142 142
143 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 143 for (i = 0; i < LOCKSTAT_POINTS; i++) {
144 if (class->contention_point[i] == 0) { 144 if (points[i] == 0) {
145 class->contention_point[i] = ip; 145 points[i] = ip;
146 break; 146 break;
147 } 147 }
148 if (class->contention_point[i] == ip) 148 if (points[i] == ip)
149 break; 149 break;
150 } 150 }
151 151
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) 185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
186 stats.contention_point[i] += pcs->contention_point[i]; 186 stats.contention_point[i] += pcs->contention_point[i];
187 187
188 for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
189 stats.contending_point[i] += pcs->contending_point[i];
190
188 lock_time_add(&pcs->read_waittime, &stats.read_waittime); 191 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
189 lock_time_add(&pcs->write_waittime, &stats.write_waittime); 192 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
190 193
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
209 memset(cpu_stats, 0, sizeof(struct lock_class_stats)); 212 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
210 } 213 }
211 memset(class->contention_point, 0, sizeof(class->contention_point)); 214 memset(class->contention_point, 0, sizeof(class->contention_point));
215 memset(class->contending_point, 0, sizeof(class->contending_point));
212} 216}
213 217
214static struct lock_class_stats *get_lock_stats(struct lock_class *class) 218static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -287,14 +291,12 @@ void lockdep_off(void)
287{ 291{
288 current->lockdep_recursion++; 292 current->lockdep_recursion++;
289} 293}
290
291EXPORT_SYMBOL(lockdep_off); 294EXPORT_SYMBOL(lockdep_off);
292 295
293void lockdep_on(void) 296void lockdep_on(void)
294{ 297{
295 current->lockdep_recursion--; 298 current->lockdep_recursion--;
296} 299}
297
298EXPORT_SYMBOL(lockdep_on); 300EXPORT_SYMBOL(lockdep_on);
299 301
300/* 302/*
@@ -576,7 +578,8 @@ static void print_lock_class_header(struct lock_class *class, int depth)
576/* 578/*
577 * printk all lock dependencies starting at <entry>: 579 * printk all lock dependencies starting at <entry>:
578 */ 580 */
579static void print_lock_dependencies(struct lock_class *class, int depth) 581static void __used
582print_lock_dependencies(struct lock_class *class, int depth)
580{ 583{
581 struct lock_list *entry; 584 struct lock_list *entry;
582 585
@@ -2508,7 +2511,6 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
2508 if (subclass) 2511 if (subclass)
2509 register_lock_class(lock, subclass, 1); 2512 register_lock_class(lock, subclass, 1);
2510} 2513}
2511
2512EXPORT_SYMBOL_GPL(lockdep_init_map); 2514EXPORT_SYMBOL_GPL(lockdep_init_map);
2513 2515
2514/* 2516/*
@@ -2689,8 +2691,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2689} 2691}
2690 2692
2691static int 2693static int
2692__lock_set_subclass(struct lockdep_map *lock, 2694__lock_set_class(struct lockdep_map *lock, const char *name,
2693 unsigned int subclass, unsigned long ip) 2695 struct lock_class_key *key, unsigned int subclass,
2696 unsigned long ip)
2694{ 2697{
2695 struct task_struct *curr = current; 2698 struct task_struct *curr = current;
2696 struct held_lock *hlock, *prev_hlock; 2699 struct held_lock *hlock, *prev_hlock;
@@ -2717,6 +2720,7 @@ __lock_set_subclass(struct lockdep_map *lock,
2717 return print_unlock_inbalance_bug(curr, lock, ip); 2720 return print_unlock_inbalance_bug(curr, lock, ip);
2718 2721
2719found_it: 2722found_it:
2723 lockdep_init_map(lock, name, key, 0);
2720 class = register_lock_class(lock, subclass, 0); 2724 class = register_lock_class(lock, subclass, 0);
2721 hlock->class_idx = class - lock_classes + 1; 2725 hlock->class_idx = class - lock_classes + 1;
2722 2726
@@ -2901,9 +2905,9 @@ static void check_flags(unsigned long flags)
2901#endif 2905#endif
2902} 2906}
2903 2907
2904void 2908void lock_set_class(struct lockdep_map *lock, const char *name,
2905lock_set_subclass(struct lockdep_map *lock, 2909 struct lock_class_key *key, unsigned int subclass,
2906 unsigned int subclass, unsigned long ip) 2910 unsigned long ip)
2907{ 2911{
2908 unsigned long flags; 2912 unsigned long flags;
2909 2913
@@ -2913,13 +2917,12 @@ lock_set_subclass(struct lockdep_map *lock,
2913 raw_local_irq_save(flags); 2917 raw_local_irq_save(flags);
2914 current->lockdep_recursion = 1; 2918 current->lockdep_recursion = 1;
2915 check_flags(flags); 2919 check_flags(flags);
2916 if (__lock_set_subclass(lock, subclass, ip)) 2920 if (__lock_set_class(lock, name, key, subclass, ip))
2917 check_chain_key(current); 2921 check_chain_key(current);
2918 current->lockdep_recursion = 0; 2922 current->lockdep_recursion = 0;
2919 raw_local_irq_restore(flags); 2923 raw_local_irq_restore(flags);
2920} 2924}
2921 2925EXPORT_SYMBOL_GPL(lock_set_class);
2922EXPORT_SYMBOL_GPL(lock_set_subclass);
2923 2926
2924/* 2927/*
2925 * We are not always called with irqs disabled - do that here, 2928 * We are not always called with irqs disabled - do that here,
@@ -2943,7 +2946,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2943 current->lockdep_recursion = 0; 2946 current->lockdep_recursion = 0;
2944 raw_local_irq_restore(flags); 2947 raw_local_irq_restore(flags);
2945} 2948}
2946
2947EXPORT_SYMBOL_GPL(lock_acquire); 2949EXPORT_SYMBOL_GPL(lock_acquire);
2948 2950
2949void lock_release(struct lockdep_map *lock, int nested, 2951void lock_release(struct lockdep_map *lock, int nested,
@@ -2961,7 +2963,6 @@ void lock_release(struct lockdep_map *lock, int nested,
2961 current->lockdep_recursion = 0; 2963 current->lockdep_recursion = 0;
2962 raw_local_irq_restore(flags); 2964 raw_local_irq_restore(flags);
2963} 2965}
2964
2965EXPORT_SYMBOL_GPL(lock_release); 2966EXPORT_SYMBOL_GPL(lock_release);
2966 2967
2967#ifdef CONFIG_LOCK_STAT 2968#ifdef CONFIG_LOCK_STAT
@@ -2999,7 +3000,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
2999 struct held_lock *hlock, *prev_hlock; 3000 struct held_lock *hlock, *prev_hlock;
3000 struct lock_class_stats *stats; 3001 struct lock_class_stats *stats;
3001 unsigned int depth; 3002 unsigned int depth;
3002 int i, point; 3003 int i, contention_point, contending_point;
3003 3004
3004 depth = curr->lockdep_depth; 3005 depth = curr->lockdep_depth;
3005 if (DEBUG_LOCKS_WARN_ON(!depth)) 3006 if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3023,18 +3024,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
3023found_it: 3024found_it:
3024 hlock->waittime_stamp = sched_clock(); 3025 hlock->waittime_stamp = sched_clock();
3025 3026
3026 point = lock_contention_point(hlock_class(hlock), ip); 3027 contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
3028 contending_point = lock_point(hlock_class(hlock)->contending_point,
3029 lock->ip);
3027 3030
3028 stats = get_lock_stats(hlock_class(hlock)); 3031 stats = get_lock_stats(hlock_class(hlock));
3029 if (point < ARRAY_SIZE(stats->contention_point)) 3032 if (contention_point < LOCKSTAT_POINTS)
3030 stats->contention_point[point]++; 3033 stats->contention_point[contention_point]++;
3034 if (contending_point < LOCKSTAT_POINTS)
3035 stats->contending_point[contending_point]++;
3031 if (lock->cpu != smp_processor_id()) 3036 if (lock->cpu != smp_processor_id())
3032 stats->bounces[bounce_contended + !!hlock->read]++; 3037 stats->bounces[bounce_contended + !!hlock->read]++;
3033 put_lock_stats(stats); 3038 put_lock_stats(stats);
3034} 3039}
3035 3040
3036static void 3041static void
3037__lock_acquired(struct lockdep_map *lock) 3042__lock_acquired(struct lockdep_map *lock, unsigned long ip)
3038{ 3043{
3039 struct task_struct *curr = current; 3044 struct task_struct *curr = current;
3040 struct held_lock *hlock, *prev_hlock; 3045 struct held_lock *hlock, *prev_hlock;
@@ -3083,6 +3088,7 @@ found_it:
3083 put_lock_stats(stats); 3088 put_lock_stats(stats);
3084 3089
3085 lock->cpu = cpu; 3090 lock->cpu = cpu;
3091 lock->ip = ip;
3086} 3092}
3087 3093
3088void lock_contended(struct lockdep_map *lock, unsigned long ip) 3094void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3104,7 +3110,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3104} 3110}
3105EXPORT_SYMBOL_GPL(lock_contended); 3111EXPORT_SYMBOL_GPL(lock_contended);
3106 3112
3107void lock_acquired(struct lockdep_map *lock) 3113void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3108{ 3114{
3109 unsigned long flags; 3115 unsigned long flags;
3110 3116
@@ -3117,7 +3123,7 @@ void lock_acquired(struct lockdep_map *lock)
3117 raw_local_irq_save(flags); 3123 raw_local_irq_save(flags);
3118 check_flags(flags); 3124 check_flags(flags);
3119 current->lockdep_recursion = 1; 3125 current->lockdep_recursion = 1;
3120 __lock_acquired(lock); 3126 __lock_acquired(lock, ip);
3121 current->lockdep_recursion = 0; 3127 current->lockdep_recursion = 0;
3122 raw_local_irq_restore(flags); 3128 raw_local_irq_restore(flags);
3123} 3129}
@@ -3441,7 +3447,6 @@ retry:
3441 if (unlock) 3447 if (unlock)
3442 read_unlock(&tasklist_lock); 3448 read_unlock(&tasklist_lock);
3443} 3449}
3444
3445EXPORT_SYMBOL_GPL(debug_show_all_locks); 3450EXPORT_SYMBOL_GPL(debug_show_all_locks);
3446 3451
3447/* 3452/*
@@ -3462,7 +3467,6 @@ void debug_show_held_locks(struct task_struct *task)
3462{ 3467{
3463 __debug_show_held_locks(task); 3468 __debug_show_held_locks(task);
3464} 3469}
3465
3466EXPORT_SYMBOL_GPL(debug_show_held_locks); 3470EXPORT_SYMBOL_GPL(debug_show_held_locks);
3467 3471
3468void lockdep_sys_exit(void) 3472void lockdep_sys_exit(void)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf9c7dd..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
470 470
471static void snprint_time(char *buf, size_t bufsiz, s64 nr) 471static void snprint_time(char *buf, size_t bufsiz, s64 nr)
472{ 472{
473 unsigned long rem; 473 s64 div;
474 s32 rem;
474 475
475 nr += 5; /* for display rounding */ 476 nr += 5; /* for display rounding */
476 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 477 div = div_s64_rem(nr, 1000, &rem);
477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); 478 snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
478} 479}
479 480
480static void seq_time(struct seq_file *m, s64 time) 481static void seq_time(struct seq_file *m, s64 time)
@@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
556 if (stats->read_holdtime.nr) 557 if (stats->read_holdtime.nr)
557 namelen += 2; 558 namelen += 2;
558 559
559 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 560 for (i = 0; i < LOCKSTAT_POINTS; i++) {
560 char sym[KSYM_SYMBOL_LEN]; 561 char sym[KSYM_SYMBOL_LEN];
561 char ip[32]; 562 char ip[32];
562 563
@@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
573 stats->contention_point[i], 574 stats->contention_point[i],
574 ip, sym); 575 ip, sym);
575 } 576 }
577 for (i = 0; i < LOCKSTAT_POINTS; i++) {
578 char sym[KSYM_SYMBOL_LEN];
579 char ip[32];
580
581 if (class->contending_point[i] == 0)
582 break;
583
584 if (!i)
585 seq_line(m, '-', 40-namelen, namelen);
586
587 sprint_symbol(sym, class->contending_point[i]);
588 snprintf(ip, sizeof(ip), "[<%p>]",
589 (void *)class->contending_point[i]);
590 seq_printf(m, "%40s %14lu %29s %s\n", name,
591 stats->contending_point[i],
592 ip, sym);
593 }
576 if (i) { 594 if (i) {
577 seq_puts(m, "\n"); 595 seq_puts(m, "\n");
578 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); 596 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
582 600
583static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
584{ 602{
585 seq_printf(m, "lock_stat version 0.2\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
586 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
587 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
588 "%14s %14s\n", 606 "%14s %14s\n",
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..4f45d4b658ef 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
59 * We also put the fastpath first in the kernel image, to make sure the 59 * We also put the fastpath first in the kernel image, to make sure the
60 * branch is predicted by the CPU as default-untaken. 60 * branch is predicted by the CPU as default-untaken.
61 */ 61 */
62static void noinline __sched 62static __used noinline void __sched
63__mutex_lock_slowpath(atomic_t *lock_count); 63__mutex_lock_slowpath(atomic_t *lock_count);
64 64
65/*** 65/***
@@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
96EXPORT_SYMBOL(mutex_lock); 96EXPORT_SYMBOL(mutex_lock);
97#endif 97#endif
98 98
99static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 99static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
100 100
101/*** 101/***
102 * mutex_unlock - release the mutex 102 * mutex_unlock - release the mutex
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
184 } 184 }
185 185
186done: 186done:
187 lock_acquired(&lock->dep_map); 187 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 188 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 189 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
190 debug_mutex_set_owner(lock, task_thread_info(task)); 190 debug_mutex_set_owner(lock, task_thread_info(task));
@@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
268/* 268/*
269 * Release the lock, slowpath: 269 * Release the lock, slowpath:
270 */ 270 */
271static noinline void 271static __used noinline void
272__mutex_unlock_slowpath(atomic_t *lock_count) 272__mutex_unlock_slowpath(atomic_t *lock_count)
273{ 273{
274 __mutex_unlock_common_slowpath(lock_count, 1); 274 __mutex_unlock_common_slowpath(lock_count, 1);
@@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
313} 313}
314EXPORT_SYMBOL(mutex_lock_killable); 314EXPORT_SYMBOL(mutex_lock_killable);
315 315
316static noinline void __sched 316static __used noinline void __sched
317__mutex_lock_slowpath(atomic_t *lock_count) 317__mutex_lock_slowpath(atomic_t *lock_count)
318{ 318{
319 struct mutex *lock = container_of(lock_count, struct mutex, count); 319 struct mutex *lock = container_of(lock_count, struct mutex, count);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4282c0a40a57..61d5aa5eced3 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
82 82
83 while (nb && nr_to_call) { 83 while (nb && nr_to_call) {
84 next_nb = rcu_dereference(nb->next); 84 next_nb = rcu_dereference(nb->next);
85
86#ifdef CONFIG_DEBUG_NOTIFIERS
87 if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
88 WARN(1, "Invalid notifier called!");
89 nb = next_nb;
90 continue;
91 }
92#endif
85 ret = nb->notifier_call(nb, val, v); 93 ret = nb->notifier_call(nb, val, v);
86 94
87 if (nr_calls) 95 if (nr_calls)
diff --git a/kernel/panic.c b/kernel/panic.c
index 4d5088355bfe..13f06349a786 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -21,6 +21,7 @@
21#include <linux/debug_locks.h> 21#include <linux/debug_locks.h>
22#include <linux/random.h> 22#include <linux/random.h>
23#include <linux/kallsyms.h> 23#include <linux/kallsyms.h>
24#include <linux/dmi.h>
24 25
25int panic_on_oops; 26int panic_on_oops;
26static unsigned long tainted_mask; 27static unsigned long tainted_mask;
@@ -321,36 +322,27 @@ void oops_exit(void)
321} 322}
322 323
323#ifdef WANT_WARN_ON_SLOWPATH 324#ifdef WANT_WARN_ON_SLOWPATH
324void warn_on_slowpath(const char *file, int line)
325{
326 char function[KSYM_SYMBOL_LEN];
327 unsigned long caller = (unsigned long) __builtin_return_address(0);
328 sprint_symbol(function, caller);
329
330 printk(KERN_WARNING "------------[ cut here ]------------\n");
331 printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
332 line, function);
333 print_modules();
334 dump_stack();
335 print_oops_end_marker();
336 add_taint(TAINT_WARN);
337}
338EXPORT_SYMBOL(warn_on_slowpath);
339
340
341void warn_slowpath(const char *file, int line, const char *fmt, ...) 325void warn_slowpath(const char *file, int line, const char *fmt, ...)
342{ 326{
343 va_list args; 327 va_list args;
344 char function[KSYM_SYMBOL_LEN]; 328 char function[KSYM_SYMBOL_LEN];
345 unsigned long caller = (unsigned long)__builtin_return_address(0); 329 unsigned long caller = (unsigned long)__builtin_return_address(0);
330 const char *board;
331
346 sprint_symbol(function, caller); 332 sprint_symbol(function, caller);
347 333
348 printk(KERN_WARNING "------------[ cut here ]------------\n"); 334 printk(KERN_WARNING "------------[ cut here ]------------\n");
349 printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, 335 printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
350 line, function); 336 line, function);
351 va_start(args, fmt); 337 board = dmi_get_system_info(DMI_PRODUCT_NAME);
352 vprintk(fmt, args); 338 if (board)
353 va_end(args); 339 printk(KERN_WARNING "Hardware name: %s\n", board);
340
341 if (fmt) {
342 va_start(args, fmt);
343 vprintk(fmt, args);
344 va_end(args);
345 }
354 346
355 print_modules(); 347 print_modules();
356 dump_stack(); 348 dump_stack();
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 4e5288a831de..157de3a47832 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -58,21 +58,21 @@ void thread_group_cputime(
58 struct task_struct *tsk, 58 struct task_struct *tsk,
59 struct task_cputime *times) 59 struct task_cputime *times)
60{ 60{
61 struct signal_struct *sig; 61 struct task_cputime *totals, *tot;
62 int i; 62 int i;
63 struct task_cputime *tot;
64 63
65 sig = tsk->signal; 64 totals = tsk->signal->cputime.totals;
66 if (unlikely(!sig) || !sig->cputime.totals) { 65 if (!totals) {
67 times->utime = tsk->utime; 66 times->utime = tsk->utime;
68 times->stime = tsk->stime; 67 times->stime = tsk->stime;
69 times->sum_exec_runtime = tsk->se.sum_exec_runtime; 68 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
70 return; 69 return;
71 } 70 }
71
72 times->stime = times->utime = cputime_zero; 72 times->stime = times->utime = cputime_zero;
73 times->sum_exec_runtime = 0; 73 times->sum_exec_runtime = 0;
74 for_each_possible_cpu(i) { 74 for_each_possible_cpu(i) {
75 tot = per_cpu_ptr(tsk->signal->cputime.totals, i); 75 tot = per_cpu_ptr(totals, i);
76 times->utime = cputime_add(times->utime, tot->utime); 76 times->utime = cputime_add(times->utime, tot->utime);
77 times->stime = cputime_add(times->stime, tot->stime); 77 times->stime = cputime_add(times->stime, tot->stime);
78 times->sum_exec_runtime += tot->sum_exec_runtime; 78 times->sum_exec_runtime += tot->sum_exec_runtime;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5e79c662294b..a140e44eebba 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -197,6 +197,11 @@ static int common_timer_create(struct k_itimer *new_timer)
197 return 0; 197 return 0;
198} 198}
199 199
200static int no_timer_create(struct k_itimer *new_timer)
201{
202 return -EOPNOTSUPP;
203}
204
200/* 205/*
201 * Return nonzero if we know a priori this clockid_t value is bogus. 206 * Return nonzero if we know a priori this clockid_t value is bogus.
202 */ 207 */
@@ -248,6 +253,7 @@ static __init int init_posix_timers(void)
248 .clock_getres = hrtimer_get_res, 253 .clock_getres = hrtimer_get_res,
249 .clock_get = posix_get_monotonic_raw, 254 .clock_get = posix_get_monotonic_raw,
250 .clock_set = do_posix_clock_nosettime, 255 .clock_set = do_posix_clock_nosettime,
256 .timer_create = no_timer_create,
251 }; 257 };
252 258
253 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 259 register_posix_clock(CLOCK_REALTIME, &clock_realtime);
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..e503a002f330 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
191 191
192 /* OK, time to rat on our buddy... */ 192 /* OK, time to rat on our buddy... */
193 193
194 printk(KERN_ERR "RCU detected CPU stalls:"); 194 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
195 for_each_possible_cpu(cpu) { 195 for_each_possible_cpu(cpu) {
196 if (cpu_isset(cpu, rcp->cpumask)) 196 if (cpu_isset(cpu, rcp->cpumask))
197 printk(" %d", cpu); 197 printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
204{ 204{
205 unsigned long flags; 205 unsigned long flags;
206 206
207 printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", 207 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
208 smp_processor_id(), jiffies, 208 smp_processor_id(), jiffies,
209 jiffies - rcp->gp_start); 209 jiffies - rcp->gp_start);
210 dump_stack(); 210 dump_stack();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 59236e8b9daa..04982659875a 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -551,6 +551,16 @@ void rcu_irq_exit(void)
551 } 551 }
552} 552}
553 553
554void rcu_nmi_enter(void)
555{
556 rcu_irq_enter();
557}
558
559void rcu_nmi_exit(void)
560{
561 rcu_irq_exit();
562}
563
554static void dyntick_save_progress_counter(int cpu) 564static void dyntick_save_progress_counter(int cpu)
555{ 565{
556 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); 566 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 35c2d3360ecf..7c2665cac172 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
149 sp->done_length += cp->done_length; 149 sp->done_length += cp->done_length;
150 sp->done_add += cp->done_add; 150 sp->done_add += cp->done_add;
151 sp->done_remove += cp->done_remove; 151 sp->done_remove += cp->done_remove;
152 atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked)); 152 atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked);
153 sp->rcu_check_callbacks += cp->rcu_check_callbacks; 153 sp->rcu_check_callbacks += cp->rcu_check_callbacks;
154 atomic_set(&sp->rcu_try_flip_1, 154 atomic_add(atomic_read(&cp->rcu_try_flip_1),
155 atomic_read(&cp->rcu_try_flip_1)); 155 &sp->rcu_try_flip_1);
156 atomic_set(&sp->rcu_try_flip_e1, 156 atomic_add(atomic_read(&cp->rcu_try_flip_e1),
157 atomic_read(&cp->rcu_try_flip_e1)); 157 &sp->rcu_try_flip_e1);
158 sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; 158 sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
159 sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; 159 sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
160 sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; 160 sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 85cb90588a55..b31065522104 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -39,6 +39,7 @@
39#include <linux/moduleparam.h> 39#include <linux/moduleparam.h>
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/reboot.h>
42#include <linux/freezer.h> 43#include <linux/freezer.h>
43#include <linux/cpu.h> 44#include <linux/cpu.h>
44#include <linux/delay.h> 45#include <linux/delay.h>
@@ -108,7 +109,6 @@ struct rcu_torture {
108 int rtort_mbtest; 109 int rtort_mbtest;
109}; 110};
110 111
111static int fullstop = 0; /* stop generating callbacks at test end. */
112static LIST_HEAD(rcu_torture_freelist); 112static LIST_HEAD(rcu_torture_freelist);
113static struct rcu_torture *rcu_torture_current = NULL; 113static struct rcu_torture *rcu_torture_current = NULL;
114static long rcu_torture_current_version = 0; 114static long rcu_torture_current_version = 0;
@@ -136,6 +136,30 @@ static int stutter_pause_test = 0;
136#endif 136#endif
137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
138 138
139#define FULLSTOP_SIGNALED 1 /* Bail due to signal. */
140#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */
141static int fullstop; /* stop generating callbacks at test end. */
142DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */
143 /* spawning of kthreads. */
144
145/*
146 * Detect and respond to a signal-based shutdown.
147 */
148static int
149rcutorture_shutdown_notify(struct notifier_block *unused1,
150 unsigned long unused2, void *unused3)
151{
152 if (fullstop)
153 return NOTIFY_DONE;
154 if (signal_pending(current)) {
155 mutex_lock(&fullstop_mutex);
156 if (!ACCESS_ONCE(fullstop))
157 fullstop = FULLSTOP_SIGNALED;
158 mutex_unlock(&fullstop_mutex);
159 }
160 return NOTIFY_DONE;
161}
162
139/* 163/*
140 * Allocate an element from the rcu_tortures pool. 164 * Allocate an element from the rcu_tortures pool.
141 */ 165 */
@@ -199,11 +223,12 @@ rcu_random(struct rcu_random_state *rrsp)
199static void 223static void
200rcu_stutter_wait(void) 224rcu_stutter_wait(void)
201{ 225{
202 while (stutter_pause_test || !rcutorture_runnable) 226 while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) {
203 if (rcutorture_runnable) 227 if (rcutorture_runnable)
204 schedule_timeout_interruptible(1); 228 schedule_timeout_interruptible(1);
205 else 229 else
206 schedule_timeout_interruptible(round_jiffies_relative(HZ)); 230 schedule_timeout_interruptible(round_jiffies_relative(HZ));
231 }
207} 232}
208 233
209/* 234/*
@@ -599,7 +624,7 @@ rcu_torture_writer(void *arg)
599 rcu_stutter_wait(); 624 rcu_stutter_wait();
600 } while (!kthread_should_stop() && !fullstop); 625 } while (!kthread_should_stop() && !fullstop);
601 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); 626 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
602 while (!kthread_should_stop()) 627 while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
603 schedule_timeout_uninterruptible(1); 628 schedule_timeout_uninterruptible(1);
604 return 0; 629 return 0;
605} 630}
@@ -624,7 +649,7 @@ rcu_torture_fakewriter(void *arg)
624 } while (!kthread_should_stop() && !fullstop); 649 } while (!kthread_should_stop() && !fullstop);
625 650
626 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); 651 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
627 while (!kthread_should_stop()) 652 while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
628 schedule_timeout_uninterruptible(1); 653 schedule_timeout_uninterruptible(1);
629 return 0; 654 return 0;
630} 655}
@@ -734,7 +759,7 @@ rcu_torture_reader(void *arg)
734 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 759 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
735 if (irqreader && cur_ops->irqcapable) 760 if (irqreader && cur_ops->irqcapable)
736 del_timer_sync(&t); 761 del_timer_sync(&t);
737 while (!kthread_should_stop()) 762 while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
738 schedule_timeout_uninterruptible(1); 763 schedule_timeout_uninterruptible(1);
739 return 0; 764 return 0;
740} 765}
@@ -831,7 +856,7 @@ rcu_torture_stats(void *arg)
831 do { 856 do {
832 schedule_timeout_interruptible(stat_interval * HZ); 857 schedule_timeout_interruptible(stat_interval * HZ);
833 rcu_torture_stats_print(); 858 rcu_torture_stats_print();
834 } while (!kthread_should_stop()); 859 } while (!kthread_should_stop() && !fullstop);
835 VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); 860 VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
836 return 0; 861 return 0;
837} 862}
@@ -899,7 +924,7 @@ rcu_torture_shuffle(void *arg)
899 do { 924 do {
900 schedule_timeout_interruptible(shuffle_interval * HZ); 925 schedule_timeout_interruptible(shuffle_interval * HZ);
901 rcu_torture_shuffle_tasks(); 926 rcu_torture_shuffle_tasks();
902 } while (!kthread_should_stop()); 927 } while (!kthread_should_stop() && !fullstop);
903 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); 928 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
904 return 0; 929 return 0;
905} 930}
@@ -914,10 +939,10 @@ rcu_torture_stutter(void *arg)
914 do { 939 do {
915 schedule_timeout_interruptible(stutter * HZ); 940 schedule_timeout_interruptible(stutter * HZ);
916 stutter_pause_test = 1; 941 stutter_pause_test = 1;
917 if (!kthread_should_stop()) 942 if (!kthread_should_stop() && !fullstop)
918 schedule_timeout_interruptible(stutter * HZ); 943 schedule_timeout_interruptible(stutter * HZ);
919 stutter_pause_test = 0; 944 stutter_pause_test = 0;
920 } while (!kthread_should_stop()); 945 } while (!kthread_should_stop() && !fullstop);
921 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); 946 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
922 return 0; 947 return 0;
923} 948}
@@ -934,12 +959,27 @@ rcu_torture_print_module_parms(char *tag)
934 stutter, irqreader); 959 stutter, irqreader);
935} 960}
936 961
962static struct notifier_block rcutorture_nb = {
963 .notifier_call = rcutorture_shutdown_notify,
964};
965
937static void 966static void
938rcu_torture_cleanup(void) 967rcu_torture_cleanup(void)
939{ 968{
940 int i; 969 int i;
941 970
942 fullstop = 1; 971 mutex_lock(&fullstop_mutex);
972 if (!fullstop) {
973 /* If being signaled, let it happen, then exit. */
974 mutex_unlock(&fullstop_mutex);
975 schedule_timeout_interruptible(10 * HZ);
976 if (cur_ops->cb_barrier != NULL)
977 cur_ops->cb_barrier();
978 return;
979 }
980 fullstop = FULLSTOP_CLEANUP;
981 mutex_unlock(&fullstop_mutex);
982 unregister_reboot_notifier(&rcutorture_nb);
943 if (stutter_task) { 983 if (stutter_task) {
944 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); 984 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
945 kthread_stop(stutter_task); 985 kthread_stop(stutter_task);
@@ -1015,6 +1055,8 @@ rcu_torture_init(void)
1015 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1055 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
1016 &srcu_ops, &sched_ops, &sched_ops_sync, }; 1056 &srcu_ops, &sched_ops, &sched_ops_sync, };
1017 1057
1058 mutex_lock(&fullstop_mutex);
1059
1018 /* Process args and tell the world that the torturer is on the job. */ 1060 /* Process args and tell the world that the torturer is on the job. */
1019 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 1061 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
1020 cur_ops = torture_ops[i]; 1062 cur_ops = torture_ops[i];
@@ -1024,6 +1066,7 @@ rcu_torture_init(void)
1024 if (i == ARRAY_SIZE(torture_ops)) { 1066 if (i == ARRAY_SIZE(torture_ops)) {
1025 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", 1067 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
1026 torture_type); 1068 torture_type);
1069 mutex_unlock(&fullstop_mutex);
1027 return (-EINVAL); 1070 return (-EINVAL);
1028 } 1071 }
1029 if (cur_ops->init) 1072 if (cur_ops->init)
@@ -1146,9 +1189,12 @@ rcu_torture_init(void)
1146 goto unwind; 1189 goto unwind;
1147 } 1190 }
1148 } 1191 }
1192 register_reboot_notifier(&rcutorture_nb);
1193 mutex_unlock(&fullstop_mutex);
1149 return 0; 1194 return 0;
1150 1195
1151unwind: 1196unwind:
1197 mutex_unlock(&fullstop_mutex);
1152 rcu_torture_cleanup(); 1198 rcu_torture_cleanup();
1153 return firsterr; 1199 return firsterr;
1154} 1200}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
new file mode 100644
index 000000000000..a342b032112c
--- /dev/null
+++ b/kernel/rcutree.c
@@ -0,0 +1,1535 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2008
19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 *
27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU
29 */
30#include <linux/types.h>
31#include <linux/kernel.h>
32#include <linux/init.h>
33#include <linux/spinlock.h>
34#include <linux/smp.h>
35#include <linux/rcupdate.h>
36#include <linux/interrupt.h>
37#include <linux/sched.h>
38#include <asm/atomic.h>
39#include <linux/bitops.h>
40#include <linux/module.h>
41#include <linux/completion.h>
42#include <linux/moduleparam.h>
43#include <linux/percpu.h>
44#include <linux/notifier.h>
45#include <linux/cpu.h>
46#include <linux/mutex.h>
47#include <linux/time.h>
48
49#ifdef CONFIG_DEBUG_LOCK_ALLOC
50static struct lock_class_key rcu_lock_key;
51struct lockdep_map rcu_lock_map =
52 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
53EXPORT_SYMBOL_GPL(rcu_lock_map);
54#endif
55
56/* Data structures. */
57
58#define RCU_STATE_INITIALIZER(name) { \
59 .level = { &name.node[0] }, \
60 .levelcnt = { \
61 NUM_RCU_LVL_0, /* root of hierarchy. */ \
62 NUM_RCU_LVL_1, \
63 NUM_RCU_LVL_2, \
64 NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
65 }, \
66 .signaled = RCU_SIGNAL_INIT, \
67 .gpnum = -300, \
68 .completed = -300, \
69 .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
70 .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
71 .n_force_qs = 0, \
72 .n_force_qs_ngp = 0, \
73}
74
75struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state);
76DEFINE_PER_CPU(struct rcu_data, rcu_data);
77
78struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
79DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
80
81#ifdef CONFIG_NO_HZ
82DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks);
83#endif /* #ifdef CONFIG_NO_HZ */
84
85static int blimit = 10; /* Maximum callbacks per softirq. */
86static int qhimark = 10000; /* If this many pending, ignore blimit. */
87static int qlowmark = 100; /* Once only this many pending, use blimit. */
88
89static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
90
91/*
92 * Return the number of RCU batches processed thus far for debug & stats.
93 */
94long rcu_batches_completed(void)
95{
96 return rcu_state.completed;
97}
98EXPORT_SYMBOL_GPL(rcu_batches_completed);
99
100/*
101 * Return the number of RCU BH batches processed thus far for debug & stats.
102 */
103long rcu_batches_completed_bh(void)
104{
105 return rcu_bh_state.completed;
106}
107EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
108
109/*
110 * Does the CPU have callbacks ready to be invoked?
111 */
112static int
113cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
114{
115 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
116}
117
118/*
119 * Does the current CPU require a yet-as-unscheduled grace period?
120 */
121static int
122cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
123{
124 /* ACCESS_ONCE() because we are accessing outside of lock. */
125 return *rdp->nxttail[RCU_DONE_TAIL] &&
126 ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum);
127}
128
129/*
130 * Return the root node of the specified rcu_state structure.
131 */
132static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
133{
134 return &rsp->node[0];
135}
136
137#ifdef CONFIG_SMP
138
139/*
140 * If the specified CPU is offline, tell the caller that it is in
141 * a quiescent state. Otherwise, whack it with a reschedule IPI.
142 * Grace periods can end up waiting on an offline CPU when that
143 * CPU is in the process of coming online -- it will be added to the
144 * rcu_node bitmasks before it actually makes it online. The same thing
145 * can happen while a CPU is in the process of coming online. Because this
146 * race is quite rare, we check for it after detecting that the grace
147 * period has been delayed rather than checking each and every CPU
148 * each and every time we start a new grace period.
149 */
150static int rcu_implicit_offline_qs(struct rcu_data *rdp)
151{
152 /*
153 * If the CPU is offline, it is in a quiescent state. We can
154 * trust its state not to change because interrupts are disabled.
155 */
156 if (cpu_is_offline(rdp->cpu)) {
157 rdp->offline_fqs++;
158 return 1;
159 }
160
161 /* The CPU is online, so send it a reschedule IPI. */
162 if (rdp->cpu != smp_processor_id())
163 smp_send_reschedule(rdp->cpu);
164 else
165 set_need_resched();
166 rdp->resched_ipi++;
167 return 0;
168}
169
170#endif /* #ifdef CONFIG_SMP */
171
172#ifdef CONFIG_NO_HZ
173static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5);
174
175/**
176 * rcu_enter_nohz - inform RCU that current CPU is entering nohz
177 *
178 * Enter nohz mode, in other words, -leave- the mode in which RCU
179 * read-side critical sections can occur. (Though RCU read-side
180 * critical sections can occur in irq handlers in nohz mode, a possibility
181 * handled by rcu_irq_enter() and rcu_irq_exit()).
182 */
183void rcu_enter_nohz(void)
184{
185 unsigned long flags;
186 struct rcu_dynticks *rdtp;
187
188 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
189 local_irq_save(flags);
190 rdtp = &__get_cpu_var(rcu_dynticks);
191 rdtp->dynticks++;
192 rdtp->dynticks_nesting--;
193 WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
194 local_irq_restore(flags);
195}
196
197/*
198 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
199 *
200 * Exit nohz mode, in other words, -enter- the mode in which RCU
201 * read-side critical sections normally occur.
202 */
203void rcu_exit_nohz(void)
204{
205 unsigned long flags;
206 struct rcu_dynticks *rdtp;
207
208 local_irq_save(flags);
209 rdtp = &__get_cpu_var(rcu_dynticks);
210 rdtp->dynticks++;
211 rdtp->dynticks_nesting++;
212 WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
213 local_irq_restore(flags);
214 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
215}
216
217/**
218 * rcu_nmi_enter - inform RCU of entry to NMI context
219 *
220 * If the CPU was idle with dynamic ticks active, and there is no
221 * irq handler running, this updates rdtp->dynticks_nmi to let the
222 * RCU grace-period handling know that the CPU is active.
223 */
224void rcu_nmi_enter(void)
225{
226 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
227
228 if (rdtp->dynticks & 0x1)
229 return;
230 rdtp->dynticks_nmi++;
231 WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs);
232 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
233}
234
235/**
236 * rcu_nmi_exit - inform RCU of exit from NMI context
237 *
238 * If the CPU was idle with dynamic ticks active, and there is no
239 * irq handler running, this updates rdtp->dynticks_nmi to let the
240 * RCU grace-period handling know that the CPU is no longer active.
241 */
242void rcu_nmi_exit(void)
243{
244 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
245
246 if (rdtp->dynticks & 0x1)
247 return;
248 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
249 rdtp->dynticks_nmi++;
250 WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs);
251}
252
253/**
254 * rcu_irq_enter - inform RCU of entry to hard irq context
255 *
256 * If the CPU was idle with dynamic ticks active, this updates the
257 * rdtp->dynticks to let the RCU handling know that the CPU is active.
258 */
259void rcu_irq_enter(void)
260{
261 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
262
263 if (rdtp->dynticks_nesting++)
264 return;
265 rdtp->dynticks++;
266 WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
267 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
268}
269
270/**
271 * rcu_irq_exit - inform RCU of exit from hard irq context
272 *
273 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
274 * to put let the RCU handling be aware that the CPU is going back to idle
275 * with no ticks.
276 */
277void rcu_irq_exit(void)
278{
279 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
280
281 if (--rdtp->dynticks_nesting)
282 return;
283 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
284 rdtp->dynticks++;
285 WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
286
287 /* If the interrupt queued a callback, get out of dyntick mode. */
288 if (__get_cpu_var(rcu_data).nxtlist ||
289 __get_cpu_var(rcu_bh_data).nxtlist)
290 set_need_resched();
291}
292
293/*
294 * Record the specified "completed" value, which is later used to validate
295 * dynticks counter manipulations. Specify "rsp->completed - 1" to
296 * unconditionally invalidate any future dynticks manipulations (which is
297 * useful at the beginning of a grace period).
298 */
299static void dyntick_record_completed(struct rcu_state *rsp, long comp)
300{
301 rsp->dynticks_completed = comp;
302}
303
304#ifdef CONFIG_SMP
305
306/*
307 * Recall the previously recorded value of the completion for dynticks.
308 */
309static long dyntick_recall_completed(struct rcu_state *rsp)
310{
311 return rsp->dynticks_completed;
312}
313
314/*
315 * Snapshot the specified CPU's dynticks counter so that we can later
316 * credit them with an implicit quiescent state. Return 1 if this CPU
317 * is already in a quiescent state courtesy of dynticks idle mode.
318 */
319static int dyntick_save_progress_counter(struct rcu_data *rdp)
320{
321 int ret;
322 int snap;
323 int snap_nmi;
324
325 snap = rdp->dynticks->dynticks;
326 snap_nmi = rdp->dynticks->dynticks_nmi;
327 smp_mb(); /* Order sampling of snap with end of grace period. */
328 rdp->dynticks_snap = snap;
329 rdp->dynticks_nmi_snap = snap_nmi;
330 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
331 if (ret)
332 rdp->dynticks_fqs++;
333 return ret;
334}
335
336/*
337 * Return true if the specified CPU has passed through a quiescent
338 * state by virtue of being in or having passed through an dynticks
339 * idle state since the last call to dyntick_save_progress_counter()
340 * for this same CPU.
341 */
342static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
343{
344 long curr;
345 long curr_nmi;
346 long snap;
347 long snap_nmi;
348
349 curr = rdp->dynticks->dynticks;
350 snap = rdp->dynticks_snap;
351 curr_nmi = rdp->dynticks->dynticks_nmi;
352 snap_nmi = rdp->dynticks_nmi_snap;
353 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
354
355 /*
356 * If the CPU passed through or entered a dynticks idle phase with
357 * no active irq/NMI handlers, then we can safely pretend that the CPU
358 * already acknowledged the request to pass through a quiescent
359 * state. Either way, that CPU cannot possibly be in an RCU
360 * read-side critical section that started before the beginning
361 * of the current RCU grace period.
362 */
363 if ((curr != snap || (curr & 0x1) == 0) &&
364 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
365 rdp->dynticks_fqs++;
366 return 1;
367 }
368
369 /* Go check for the CPU being offline. */
370 return rcu_implicit_offline_qs(rdp);
371}
372
373#endif /* #ifdef CONFIG_SMP */
374
375#else /* #ifdef CONFIG_NO_HZ */
376
377static void dyntick_record_completed(struct rcu_state *rsp, long comp)
378{
379}
380
381#ifdef CONFIG_SMP
382
383/*
384 * If there are no dynticks, then the only way that a CPU can passively
385 * be in a quiescent state is to be offline. Unlike dynticks idle, which
386 * is a point in time during the prior (already finished) grace period,
387 * an offline CPU is always in a quiescent state, and thus can be
388 * unconditionally applied. So just return the current value of completed.
389 */
390static long dyntick_recall_completed(struct rcu_state *rsp)
391{
392 return rsp->completed;
393}
394
395static int dyntick_save_progress_counter(struct rcu_data *rdp)
396{
397 return 0;
398}
399
400static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
401{
402 return rcu_implicit_offline_qs(rdp);
403}
404
405#endif /* #ifdef CONFIG_SMP */
406
407#endif /* #else #ifdef CONFIG_NO_HZ */
408
409#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
410
411static void record_gp_stall_check_time(struct rcu_state *rsp)
412{
413 rsp->gp_start = jiffies;
414 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
415}
416
417static void print_other_cpu_stall(struct rcu_state *rsp)
418{
419 int cpu;
420 long delta;
421 unsigned long flags;
422 struct rcu_node *rnp = rcu_get_root(rsp);
423 struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
424 struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
425
426 /* Only let one CPU complain about others per time interval. */
427
428 spin_lock_irqsave(&rnp->lock, flags);
429 delta = jiffies - rsp->jiffies_stall;
430 if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) {
431 spin_unlock_irqrestore(&rnp->lock, flags);
432 return;
433 }
434 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
435 spin_unlock_irqrestore(&rnp->lock, flags);
436
437 /* OK, time to rat on our buddy... */
438
439 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
440 for (; rnp_cur < rnp_end; rnp_cur++) {
441 if (rnp_cur->qsmask == 0)
442 continue;
443 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
444 if (rnp_cur->qsmask & (1UL << cpu))
445 printk(" %d", rnp_cur->grplo + cpu);
446 }
447 printk(" (detected by %d, t=%ld jiffies)\n",
448 smp_processor_id(), (long)(jiffies - rsp->gp_start));
449 force_quiescent_state(rsp, 0); /* Kick them all. */
450}
451
452static void print_cpu_stall(struct rcu_state *rsp)
453{
454 unsigned long flags;
455 struct rcu_node *rnp = rcu_get_root(rsp);
456
457 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
458 smp_processor_id(), jiffies - rsp->gp_start);
459 dump_stack();
460 spin_lock_irqsave(&rnp->lock, flags);
461 if ((long)(jiffies - rsp->jiffies_stall) >= 0)
462 rsp->jiffies_stall =
463 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
464 spin_unlock_irqrestore(&rnp->lock, flags);
465 set_need_resched(); /* kick ourselves to get things going. */
466}
467
468static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
469{
470 long delta;
471 struct rcu_node *rnp;
472
473 delta = jiffies - rsp->jiffies_stall;
474 rnp = rdp->mynode;
475 if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
476
477 /* We haven't checked in, so go dump stack. */
478 print_cpu_stall(rsp);
479
480 } else if (rsp->gpnum != rsp->completed &&
481 delta >= RCU_STALL_RAT_DELAY) {
482
483 /* They had two time units to dump stack, so complain. */
484 print_other_cpu_stall(rsp);
485 }
486}
487
488#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
489
490static void record_gp_stall_check_time(struct rcu_state *rsp)
491{
492}
493
494static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
495{
496}
497
498#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
499
500/*
501 * Update CPU-local rcu_data state to record the newly noticed grace period.
502 * This is used both when we started the grace period and when we notice
503 * that someone else started the grace period.
504 */
505static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
506{
507 rdp->qs_pending = 1;
508 rdp->passed_quiesc = 0;
509 rdp->gpnum = rsp->gpnum;
510 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
511 RCU_JIFFIES_TILL_FORCE_QS;
512}
513
514/*
515 * Did someone else start a new RCU grace period start since we last
516 * checked? Update local state appropriately if so. Must be called
517 * on the CPU corresponding to rdp.
518 */
519static int
520check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
521{
522 unsigned long flags;
523 int ret = 0;
524
525 local_irq_save(flags);
526 if (rdp->gpnum != rsp->gpnum) {
527 note_new_gpnum(rsp, rdp);
528 ret = 1;
529 }
530 local_irq_restore(flags);
531 return ret;
532}
533
534/*
535 * Start a new RCU grace period if warranted, re-initializing the hierarchy
536 * in preparation for detecting the next grace period. The caller must hold
537 * the root node's ->lock, which is released before return. Hard irqs must
538 * be disabled.
539 */
540static void
541rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
542 __releases(rcu_get_root(rsp)->lock)
543{
544 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
545 struct rcu_node *rnp = rcu_get_root(rsp);
546 struct rcu_node *rnp_cur;
547 struct rcu_node *rnp_end;
548
549 if (!cpu_needs_another_gp(rsp, rdp)) {
550 spin_unlock_irqrestore(&rnp->lock, flags);
551 return;
552 }
553
554 /* Advance to a new grace period and initialize state. */
555 rsp->gpnum++;
556 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
557 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
558 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
559 RCU_JIFFIES_TILL_FORCE_QS;
560 record_gp_stall_check_time(rsp);
561 dyntick_record_completed(rsp, rsp->completed - 1);
562 note_new_gpnum(rsp, rdp);
563
564 /*
565 * Because we are first, we know that all our callbacks will
566 * be covered by this upcoming grace period, even the ones
567 * that were registered arbitrarily recently.
568 */
569 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
570 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
571
572 /* Special-case the common single-level case. */
573 if (NUM_RCU_NODES == 1) {
574 rnp->qsmask = rnp->qsmaskinit;
575 spin_unlock_irqrestore(&rnp->lock, flags);
576 return;
577 }
578
579 spin_unlock(&rnp->lock); /* leave irqs disabled. */
580
581
582 /* Exclude any concurrent CPU-hotplug operations. */
583 spin_lock(&rsp->onofflock); /* irqs already disabled. */
584
585 /*
586 * Set the quiescent-state-needed bits in all the non-leaf RCU
587 * nodes for all currently online CPUs. This operation relies
588 * on the layout of the hierarchy within the rsp->node[] array.
589 * Note that other CPUs will access only the leaves of the
590 * hierarchy, which still indicate that no grace period is in
591 * progress. In addition, we have excluded CPU-hotplug operations.
592 *
593 * We therefore do not need to hold any locks. Any required
594 * memory barriers will be supplied by the locks guarding the
595 * leaf rcu_nodes in the hierarchy.
596 */
597
598 rnp_end = rsp->level[NUM_RCU_LVLS - 1];
599 for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++)
600 rnp_cur->qsmask = rnp_cur->qsmaskinit;
601
602 /*
603 * Now set up the leaf nodes. Here we must be careful. First,
604 * we need to hold the lock in order to exclude other CPUs, which
605 * might be contending for the leaf nodes' locks. Second, as
606 * soon as we initialize a given leaf node, its CPUs might run
607 * up the rest of the hierarchy. We must therefore acquire locks
608 * for each node that we touch during this stage. (But we still
609 * are excluding CPU-hotplug operations.)
610 *
611 * Note that the grace period cannot complete until we finish
612 * the initialization process, as there will be at least one
613 * qsmask bit set in the root node until that time, namely the
614 * one corresponding to this CPU.
615 */
616 rnp_end = &rsp->node[NUM_RCU_NODES];
617 rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
618 for (; rnp_cur < rnp_end; rnp_cur++) {
619 spin_lock(&rnp_cur->lock); /* irqs already disabled. */
620 rnp_cur->qsmask = rnp_cur->qsmaskinit;
621 spin_unlock(&rnp_cur->lock); /* irqs already disabled. */
622 }
623
624 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
625 spin_unlock_irqrestore(&rsp->onofflock, flags);
626}
627
628/*
629 * Advance this CPU's callbacks, but only if the current grace period
630 * has ended. This may be called only from the CPU to whom the rdp
631 * belongs.
632 */
633static void
634rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
635{
636 long completed_snap;
637 unsigned long flags;
638
639 local_irq_save(flags);
640 completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */
641
642 /* Did another grace period end? */
643 if (rdp->completed != completed_snap) {
644
645 /* Advance callbacks. No harm if list empty. */
646 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
647 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
648 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
649
650 /* Remember that we saw this grace-period completion. */
651 rdp->completed = completed_snap;
652 }
653 local_irq_restore(flags);
654}
655
656/*
657 * Similar to cpu_quiet(), for which it is a helper function. Allows
658 * a group of CPUs to be quieted at one go, though all the CPUs in the
659 * group must be represented by the same leaf rcu_node structure.
660 * That structure's lock must be held upon entry, and it is released
661 * before return.
662 */
663static void
664cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
665 unsigned long flags)
666 __releases(rnp->lock)
667{
668 /* Walk up the rcu_node hierarchy. */
669 for (;;) {
670 if (!(rnp->qsmask & mask)) {
671
672 /* Our bit has already been cleared, so done. */
673 spin_unlock_irqrestore(&rnp->lock, flags);
674 return;
675 }
676 rnp->qsmask &= ~mask;
677 if (rnp->qsmask != 0) {
678
679 /* Other bits still set at this level, so done. */
680 spin_unlock_irqrestore(&rnp->lock, flags);
681 return;
682 }
683 mask = rnp->grpmask;
684 if (rnp->parent == NULL) {
685
686 /* No more levels. Exit loop holding root lock. */
687
688 break;
689 }
690 spin_unlock_irqrestore(&rnp->lock, flags);
691 rnp = rnp->parent;
692 spin_lock_irqsave(&rnp->lock, flags);
693 }
694
695 /*
696 * Get here if we are the last CPU to pass through a quiescent
697 * state for this grace period. Clean up and let rcu_start_gp()
698 * start up the next grace period if one is needed. Note that
699 * we still hold rnp->lock, as required by rcu_start_gp(), which
700 * will release it.
701 */
702 rsp->completed = rsp->gpnum;
703 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
704 rcu_start_gp(rsp, flags); /* releases rnp->lock. */
705}
706
707/*
708 * Record a quiescent state for the specified CPU, which must either be
709 * the current CPU or an offline CPU. The lastcomp argument is used to
710 * make sure we are still in the grace period of interest. We don't want
711 * to end the current grace period based on quiescent states detected in
712 * an earlier grace period!
713 */
714static void
715cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
716{
717 unsigned long flags;
718 unsigned long mask;
719 struct rcu_node *rnp;
720
721 rnp = rdp->mynode;
722 spin_lock_irqsave(&rnp->lock, flags);
723 if (lastcomp != ACCESS_ONCE(rsp->completed)) {
724
725 /*
726 * Someone beat us to it for this grace period, so leave.
727 * The race with GP start is resolved by the fact that we
728 * hold the leaf rcu_node lock, so that the per-CPU bits
729 * cannot yet be initialized -- so we would simply find our
730 * CPU's bit already cleared in cpu_quiet_msk() if this race
731 * occurred.
732 */
733 rdp->passed_quiesc = 0; /* try again later! */
734 spin_unlock_irqrestore(&rnp->lock, flags);
735 return;
736 }
737 mask = rdp->grpmask;
738 if ((rnp->qsmask & mask) == 0) {
739 spin_unlock_irqrestore(&rnp->lock, flags);
740 } else {
741 rdp->qs_pending = 0;
742
743 /*
744 * This GP can't end until cpu checks in, so all of our
745 * callbacks can be processed during the next GP.
746 */
747 rdp = rsp->rda[smp_processor_id()];
748 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
749
750 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
751 }
752}
753
754/*
755 * Check to see if there is a new grace period of which this CPU
756 * is not yet aware, and if so, set up local rcu_data state for it.
757 * Otherwise, see if this CPU has just passed through its first
758 * quiescent state for this grace period, and record that fact if so.
759 */
760static void
761rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
762{
763 /* If there is now a new grace period, record and return. */
764 if (check_for_new_grace_period(rsp, rdp))
765 return;
766
767 /*
768 * Does this CPU still need to do its part for current grace period?
769 * If no, return and let the other CPUs do their part as well.
770 */
771 if (!rdp->qs_pending)
772 return;
773
774 /*
775 * Was there a quiescent state since the beginning of the grace
776 * period? If no, then exit and wait for the next call.
777 */
778 if (!rdp->passed_quiesc)
779 return;
780
781 /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */
782 cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
783}
784
785#ifdef CONFIG_HOTPLUG_CPU
786
787/*
788 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
789 * and move all callbacks from the outgoing CPU to the current one.
790 */
791static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
792{
793 int i;
794 unsigned long flags;
795 long lastcomp;
796 unsigned long mask;
797 struct rcu_data *rdp = rsp->rda[cpu];
798 struct rcu_data *rdp_me;
799 struct rcu_node *rnp;
800
801 /* Exclude any attempts to start a new grace period. */
802 spin_lock_irqsave(&rsp->onofflock, flags);
803
804 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
805 rnp = rdp->mynode;
806 mask = rdp->grpmask; /* rnp->grplo is constant. */
807 do {
808 spin_lock(&rnp->lock); /* irqs already disabled. */
809 rnp->qsmaskinit &= ~mask;
810 if (rnp->qsmaskinit != 0) {
811 spin_unlock(&rnp->lock); /* irqs already disabled. */
812 break;
813 }
814 mask = rnp->grpmask;
815 spin_unlock(&rnp->lock); /* irqs already disabled. */
816 rnp = rnp->parent;
817 } while (rnp != NULL);
818 lastcomp = rsp->completed;
819
820 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
821
822 /* Being offline is a quiescent state, so go record it. */
823 cpu_quiet(cpu, rsp, rdp, lastcomp);
824
825 /*
826 * Move callbacks from the outgoing CPU to the running CPU.
827 * Note that the outgoing CPU is now quiscent, so it is now
828 * (uncharacteristically) safe to access it rcu_data structure.
829 * Note also that we must carefully retain the order of the
830 * outgoing CPU's callbacks in order for rcu_barrier() to work
831 * correctly. Finally, note that we start all the callbacks
832 * afresh, even those that have passed through a grace period
833 * and are therefore ready to invoke. The theory is that hotplug
834 * events are rare, and that if they are frequent enough to
835 * indefinitely delay callbacks, you have far worse things to
836 * be worrying about.
837 */
838 rdp_me = rsp->rda[smp_processor_id()];
839 if (rdp->nxtlist != NULL) {
840 *rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
841 rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
842 rdp->nxtlist = NULL;
843 for (i = 0; i < RCU_NEXT_SIZE; i++)
844 rdp->nxttail[i] = &rdp->nxtlist;
845 rdp_me->qlen += rdp->qlen;
846 rdp->qlen = 0;
847 }
848 local_irq_restore(flags);
849}
850
851/*
852 * Remove the specified CPU from the RCU hierarchy and move any pending
853 * callbacks that it might have to the current CPU. This code assumes
854 * that at least one CPU in the system will remain running at all times.
855 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
856 */
857static void rcu_offline_cpu(int cpu)
858{
859 __rcu_offline_cpu(cpu, &rcu_state);
860 __rcu_offline_cpu(cpu, &rcu_bh_state);
861}
862
863#else /* #ifdef CONFIG_HOTPLUG_CPU */
864
865static void rcu_offline_cpu(int cpu)
866{
867}
868
869#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
870
871/*
872 * Invoke any RCU callbacks that have made it to the end of their grace
873 * period. Thottle as specified by rdp->blimit.
874 */
875static void rcu_do_batch(struct rcu_data *rdp)
876{
877 unsigned long flags;
878 struct rcu_head *next, *list, **tail;
879 int count;
880
881 /* If no callbacks are ready, just return.*/
882 if (!cpu_has_callbacks_ready_to_invoke(rdp))
883 return;
884
885 /*
886 * Extract the list of ready callbacks, disabling to prevent
887 * races with call_rcu() from interrupt handlers.
888 */
889 local_irq_save(flags);
890 list = rdp->nxtlist;
891 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
892 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
893 tail = rdp->nxttail[RCU_DONE_TAIL];
894 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
895 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
896 rdp->nxttail[count] = &rdp->nxtlist;
897 local_irq_restore(flags);
898
899 /* Invoke callbacks. */
900 count = 0;
901 while (list) {
902 next = list->next;
903 prefetch(next);
904 list->func(list);
905 list = next;
906 if (++count >= rdp->blimit)
907 break;
908 }
909
910 local_irq_save(flags);
911
912 /* Update count, and requeue any remaining callbacks. */
913 rdp->qlen -= count;
914 if (list != NULL) {
915 *tail = rdp->nxtlist;
916 rdp->nxtlist = list;
917 for (count = 0; count < RCU_NEXT_SIZE; count++)
918 if (&rdp->nxtlist == rdp->nxttail[count])
919 rdp->nxttail[count] = tail;
920 else
921 break;
922 }
923
924 /* Reinstate batch limit if we have worked down the excess. */
925 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
926 rdp->blimit = blimit;
927
928 local_irq_restore(flags);
929
930 /* Re-raise the RCU softirq if there are callbacks remaining. */
931 if (cpu_has_callbacks_ready_to_invoke(rdp))
932 raise_softirq(RCU_SOFTIRQ);
933}
934
935/*
936 * Check to see if this CPU is in a non-context-switch quiescent state
937 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
938 * Also schedule the RCU softirq handler.
939 *
940 * This function must be called with hardirqs disabled. It is normally
941 * invoked from the scheduling-clock interrupt. If rcu_pending returns
942 * false, there is no point in invoking rcu_check_callbacks().
943 */
944void rcu_check_callbacks(int cpu, int user)
945{
946 if (user ||
947 (idle_cpu(cpu) && !in_softirq() &&
948 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
949
950 /*
951 * Get here if this CPU took its interrupt from user
952 * mode or from the idle loop, and if this is not a
953 * nested interrupt. In this case, the CPU is in
954 * a quiescent state, so count it.
955 *
956 * No memory barrier is required here because both
957 * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference
958 * only CPU-local variables that other CPUs neither
959 * access nor modify, at least not while the corresponding
960 * CPU is online.
961 */
962
963 rcu_qsctr_inc(cpu);
964 rcu_bh_qsctr_inc(cpu);
965
966 } else if (!in_softirq()) {
967
968 /*
969 * Get here if this CPU did not take its interrupt from
970 * softirq, in other words, if it is not interrupting
971 * a rcu_bh read-side critical section. This is an _bh
972 * critical section, so count it.
973 */
974
975 rcu_bh_qsctr_inc(cpu);
976 }
977 raise_softirq(RCU_SOFTIRQ);
978}
979
980#ifdef CONFIG_SMP
981
982/*
983 * Scan the leaf rcu_node structures, processing dyntick state for any that
984 * have not yet encountered a quiescent state, using the function specified.
985 * Returns 1 if the current grace period ends while scanning (possibly
986 * because we made it end).
987 */
988static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
989 int (*f)(struct rcu_data *))
990{
991 unsigned long bit;
992 int cpu;
993 unsigned long flags;
994 unsigned long mask;
995 struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
996 struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
997
998 for (; rnp_cur < rnp_end; rnp_cur++) {
999 mask = 0;
1000 spin_lock_irqsave(&rnp_cur->lock, flags);
1001 if (rsp->completed != lastcomp) {
1002 spin_unlock_irqrestore(&rnp_cur->lock, flags);
1003 return 1;
1004 }
1005 if (rnp_cur->qsmask == 0) {
1006 spin_unlock_irqrestore(&rnp_cur->lock, flags);
1007 continue;
1008 }
1009 cpu = rnp_cur->grplo;
1010 bit = 1;
1011 for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) {
1012 if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu]))
1013 mask |= bit;
1014 }
1015 if (mask != 0 && rsp->completed == lastcomp) {
1016
1017 /* cpu_quiet_msk() releases rnp_cur->lock. */
1018 cpu_quiet_msk(mask, rsp, rnp_cur, flags);
1019 continue;
1020 }
1021 spin_unlock_irqrestore(&rnp_cur->lock, flags);
1022 }
1023 return 0;
1024}
1025
1026/*
1027 * Force quiescent states on reluctant CPUs, and also detect which
1028 * CPUs are in dyntick-idle mode.
1029 */
1030static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1031{
1032 unsigned long flags;
1033 long lastcomp;
1034 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
1035 struct rcu_node *rnp = rcu_get_root(rsp);
1036 u8 signaled;
1037
1038 if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum))
1039 return; /* No grace period in progress, nothing to force. */
1040 if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
1041 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1042 return; /* Someone else is already on the job. */
1043 }
1044 if (relaxed &&
1045 (long)(rsp->jiffies_force_qs - jiffies) >= 0 &&
1046 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
1047 goto unlock_ret; /* no emergency and done recently. */
1048 rsp->n_force_qs++;
1049 spin_lock(&rnp->lock);
1050 lastcomp = rsp->completed;
1051 signaled = rsp->signaled;
1052 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1053 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
1054 RCU_JIFFIES_TILL_FORCE_QS;
1055 if (lastcomp == rsp->gpnum) {
1056 rsp->n_force_qs_ngp++;
1057 spin_unlock(&rnp->lock);
1058 goto unlock_ret; /* no GP in progress, time updated. */
1059 }
1060 spin_unlock(&rnp->lock);
1061 switch (signaled) {
1062 case RCU_GP_INIT:
1063
1064 break; /* grace period still initializing, ignore. */
1065
1066 case RCU_SAVE_DYNTICK:
1067
1068 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1069 break; /* So gcc recognizes the dead code. */
1070
1071 /* Record dyntick-idle state. */
1072 if (rcu_process_dyntick(rsp, lastcomp,
1073 dyntick_save_progress_counter))
1074 goto unlock_ret;
1075
1076 /* Update state, record completion counter. */
1077 spin_lock(&rnp->lock);
1078 if (lastcomp == rsp->completed) {
1079 rsp->signaled = RCU_FORCE_QS;
1080 dyntick_record_completed(rsp, lastcomp);
1081 }
1082 spin_unlock(&rnp->lock);
1083 break;
1084
1085 case RCU_FORCE_QS:
1086
1087 /* Check dyntick-idle state, send IPI to laggarts. */
1088 if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp),
1089 rcu_implicit_dynticks_qs))
1090 goto unlock_ret;
1091
1092 /* Leave state in case more forcing is required. */
1093
1094 break;
1095 }
1096unlock_ret:
1097 spin_unlock_irqrestore(&rsp->fqslock, flags);
1098}
1099
1100#else /* #ifdef CONFIG_SMP */
1101
1102static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1103{
1104 set_need_resched();
1105}
1106
1107#endif /* #else #ifdef CONFIG_SMP */
1108
1109/*
1110 * This does the RCU processing work from softirq context for the
1111 * specified rcu_state and rcu_data structures. This may be called
1112 * only from the CPU to whom the rdp belongs.
1113 */
1114static void
1115__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1116{
1117 unsigned long flags;
1118
1119 /*
1120 * If an RCU GP has gone long enough, go check for dyntick
1121 * idle CPUs and, if needed, send resched IPIs.
1122 */
1123 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1124 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1125 force_quiescent_state(rsp, 1);
1126
1127 /*
1128 * Advance callbacks in response to end of earlier grace
1129 * period that some other CPU ended.
1130 */
1131 rcu_process_gp_end(rsp, rdp);
1132
1133 /* Update RCU state based on any recent quiescent states. */
1134 rcu_check_quiescent_state(rsp, rdp);
1135
1136 /* Does this CPU require a not-yet-started grace period? */
1137 if (cpu_needs_another_gp(rsp, rdp)) {
1138 spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
1139 rcu_start_gp(rsp, flags); /* releases above lock */
1140 }
1141
1142 /* If there are callbacks ready, invoke them. */
1143 rcu_do_batch(rdp);
1144}
1145
1146/*
1147 * Do softirq processing for the current CPU.
1148 */
1149static void rcu_process_callbacks(struct softirq_action *unused)
1150{
1151 /*
1152 * Memory references from any prior RCU read-side critical sections
1153 * executed by the interrupted code must be seen before any RCU
1154 * grace-period manipulations below.
1155 */
1156 smp_mb(); /* See above block comment. */
1157
1158 __rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data));
1159 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1160
1161 /*
1162 * Memory references from any later RCU read-side critical sections
1163 * executed by the interrupted code must be seen after any RCU
1164 * grace-period manipulations above.
1165 */
1166 smp_mb(); /* See above block comment. */
1167}
1168
1169static void
1170__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1171 struct rcu_state *rsp)
1172{
1173 unsigned long flags;
1174 struct rcu_data *rdp;
1175
1176 head->func = func;
1177 head->next = NULL;
1178
1179 smp_mb(); /* Ensure RCU update seen before callback registry. */
1180
1181 /*
1182 * Opportunistically note grace-period endings and beginnings.
1183 * Note that we might see a beginning right after we see an
1184 * end, but never vice versa, since this CPU has to pass through
1185 * a quiescent state betweentimes.
1186 */
1187 local_irq_save(flags);
1188 rdp = rsp->rda[smp_processor_id()];
1189 rcu_process_gp_end(rsp, rdp);
1190 check_for_new_grace_period(rsp, rdp);
1191
1192 /* Add the callback to our list. */
1193 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1194 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1195
1196 /* Start a new grace period if one not already started. */
1197 if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) {
1198 unsigned long nestflag;
1199 struct rcu_node *rnp_root = rcu_get_root(rsp);
1200
1201 spin_lock_irqsave(&rnp_root->lock, nestflag);
1202 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1203 }
1204
1205 /* Force the grace period if too many callbacks or too long waiting. */
1206 if (unlikely(++rdp->qlen > qhimark)) {
1207 rdp->blimit = LONG_MAX;
1208 force_quiescent_state(rsp, 0);
1209 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1210 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1211 force_quiescent_state(rsp, 1);
1212 local_irq_restore(flags);
1213}
1214
1215/*
1216 * Queue an RCU callback for invocation after a grace period.
1217 */
1218void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1219{
1220 __call_rcu(head, func, &rcu_state);
1221}
1222EXPORT_SYMBOL_GPL(call_rcu);
1223
1224/*
1225 * Queue an RCU for invocation after a quicker grace period.
1226 */
1227void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1228{
1229 __call_rcu(head, func, &rcu_bh_state);
1230}
1231EXPORT_SYMBOL_GPL(call_rcu_bh);
1232
1233/*
1234 * Check to see if there is any immediate RCU-related work to be done
1235 * by the current CPU, for the specified type of RCU, returning 1 if so.
1236 * The checks are in order of increasing expense: checks that can be
1237 * carried out against CPU-local state are performed first. However,
1238 * we must check for CPU stalls first, else we might not get a chance.
1239 */
1240static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1241{
1242 rdp->n_rcu_pending++;
1243
1244 /* Check for CPU stalls, if enabled. */
1245 check_cpu_stall(rsp, rdp);
1246
1247 /* Is the RCU core waiting for a quiescent state from this CPU? */
1248 if (rdp->qs_pending)
1249 return 1;
1250
1251 /* Does this CPU have callbacks ready to invoke? */
1252 if (cpu_has_callbacks_ready_to_invoke(rdp))
1253 return 1;
1254
1255 /* Has RCU gone idle with this CPU needing another grace period? */
1256 if (cpu_needs_another_gp(rsp, rdp))
1257 return 1;
1258
1259 /* Has another RCU grace period completed? */
1260 if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
1261 return 1;
1262
1263 /* Has a new RCU grace period started? */
1264 if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
1265 return 1;
1266
1267 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1268 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1269 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1270 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
1271 return 1;
1272
1273 /* nothing to do */
1274 return 0;
1275}
1276
1277/*
1278 * Check to see if there is any immediate RCU-related work to be done
1279 * by the current CPU, returning 1 if so. This function is part of the
1280 * RCU implementation; it is -not- an exported member of the RCU API.
1281 */
1282int rcu_pending(int cpu)
1283{
1284 return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) ||
1285 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
1286}
1287
1288/*
1289 * Check to see if any future RCU-related work will need to be done
1290 * by the current CPU, even if none need be done immediately, returning
1291 * 1 if so. This function is part of the RCU implementation; it is -not-
1292 * an exported member of the RCU API.
1293 */
1294int rcu_needs_cpu(int cpu)
1295{
1296 /* RCU callbacks either ready or pending? */
1297 return per_cpu(rcu_data, cpu).nxtlist ||
1298 per_cpu(rcu_bh_data, cpu).nxtlist;
1299}
1300
1301/*
1302 * Initialize a CPU's per-CPU RCU data. We take this "scorched earth"
1303 * approach so that we don't have to worry about how long the CPU has
1304 * been gone, or whether it ever was online previously. We do trust the
1305 * ->mynode field, as it is constant for a given struct rcu_data and
1306 * initialized during early boot.
1307 *
1308 * Note that only one online or offline event can be happening at a given
1309 * time. Note also that we can accept some slop in the rsp->completed
1310 * access due to the fact that this CPU cannot possibly have any RCU
1311 * callbacks in flight yet.
1312 */
1313static void
1314rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1315{
1316 unsigned long flags;
1317 int i;
1318 long lastcomp;
1319 unsigned long mask;
1320 struct rcu_data *rdp = rsp->rda[cpu];
1321 struct rcu_node *rnp = rcu_get_root(rsp);
1322
1323 /* Set up local state, ensuring consistent view of global state. */
1324 spin_lock_irqsave(&rnp->lock, flags);
1325 lastcomp = rsp->completed;
1326 rdp->completed = lastcomp;
1327 rdp->gpnum = lastcomp;
1328 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1329 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1330 rdp->beenonline = 1; /* We have now been online. */
1331 rdp->passed_quiesc_completed = lastcomp - 1;
1332 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
1333 rdp->nxtlist = NULL;
1334 for (i = 0; i < RCU_NEXT_SIZE; i++)
1335 rdp->nxttail[i] = &rdp->nxtlist;
1336 rdp->qlen = 0;
1337 rdp->blimit = blimit;
1338#ifdef CONFIG_NO_HZ
1339 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1340#endif /* #ifdef CONFIG_NO_HZ */
1341 rdp->cpu = cpu;
1342 spin_unlock(&rnp->lock); /* irqs remain disabled. */
1343
1344 /*
1345 * A new grace period might start here. If so, we won't be part
1346 * of it, but that is OK, as we are currently in a quiescent state.
1347 */
1348
1349 /* Exclude any attempts to start a new GP on large systems. */
1350 spin_lock(&rsp->onofflock); /* irqs already disabled. */
1351
1352 /* Add CPU to rcu_node bitmasks. */
1353 rnp = rdp->mynode;
1354 mask = rdp->grpmask;
1355 do {
1356 /* Exclude any attempts to start a new GP on small systems. */
1357 spin_lock(&rnp->lock); /* irqs already disabled. */
1358 rnp->qsmaskinit |= mask;
1359 mask = rnp->grpmask;
1360 spin_unlock(&rnp->lock); /* irqs already disabled. */
1361 rnp = rnp->parent;
1362 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
1363
1364 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1365
1366 /*
1367 * A new grace period might start here. If so, we will be part of
1368 * it, and its gpnum will be greater than ours, so we will
1369 * participate. It is also possible for the gpnum to have been
1370 * incremented before this function was called, and the bitmasks
1371 * to not be filled out until now, in which case we will also
1372 * participate due to our gpnum being behind.
1373 */
1374
1375 /* Since it is coming online, the CPU is in a quiescent state. */
1376 cpu_quiet(cpu, rsp, rdp, lastcomp);
1377 local_irq_restore(flags);
1378}
1379
1380static void __cpuinit rcu_online_cpu(int cpu)
1381{
1382#ifdef CONFIG_NO_HZ
1383 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1384
1385 rdtp->dynticks_nesting = 1;
1386 rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */
1387 rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1;
1388#endif /* #ifdef CONFIG_NO_HZ */
1389 rcu_init_percpu_data(cpu, &rcu_state);
1390 rcu_init_percpu_data(cpu, &rcu_bh_state);
1391 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1392}
1393
1394/*
1395 * Handle CPU online/offline notifcation events.
1396 */
1397static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1398 unsigned long action, void *hcpu)
1399{
1400 long cpu = (long)hcpu;
1401
1402 switch (action) {
1403 case CPU_UP_PREPARE:
1404 case CPU_UP_PREPARE_FROZEN:
1405 rcu_online_cpu(cpu);
1406 break;
1407 case CPU_DEAD:
1408 case CPU_DEAD_FROZEN:
1409 case CPU_UP_CANCELED:
1410 case CPU_UP_CANCELED_FROZEN:
1411 rcu_offline_cpu(cpu);
1412 break;
1413 default:
1414 break;
1415 }
1416 return NOTIFY_OK;
1417}
1418
1419/*
1420 * Compute the per-level fanout, either using the exact fanout specified
1421 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
1422 */
1423#ifdef CONFIG_RCU_FANOUT_EXACT
1424static void __init rcu_init_levelspread(struct rcu_state *rsp)
1425{
1426 int i;
1427
1428 for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
1429 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
1430}
1431#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
1432static void __init rcu_init_levelspread(struct rcu_state *rsp)
1433{
1434 int ccur;
1435 int cprv;
1436 int i;
1437
1438 cprv = NR_CPUS;
1439 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
1440 ccur = rsp->levelcnt[i];
1441 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
1442 cprv = ccur;
1443 }
1444}
1445#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
1446
1447/*
1448 * Helper function for rcu_init() that initializes one rcu_state structure.
1449 */
1450static void __init rcu_init_one(struct rcu_state *rsp)
1451{
1452 int cpustride = 1;
1453 int i;
1454 int j;
1455 struct rcu_node *rnp;
1456
1457 /* Initialize the level-tracking arrays. */
1458
1459 for (i = 1; i < NUM_RCU_LVLS; i++)
1460 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
1461 rcu_init_levelspread(rsp);
1462
1463 /* Initialize the elements themselves, starting from the leaves. */
1464
1465 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
1466 cpustride *= rsp->levelspread[i];
1467 rnp = rsp->level[i];
1468 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1469 spin_lock_init(&rnp->lock);
1470 rnp->qsmask = 0;
1471 rnp->qsmaskinit = 0;
1472 rnp->grplo = j * cpustride;
1473 rnp->grphi = (j + 1) * cpustride - 1;
1474 if (rnp->grphi >= NR_CPUS)
1475 rnp->grphi = NR_CPUS - 1;
1476 if (i == 0) {
1477 rnp->grpnum = 0;
1478 rnp->grpmask = 0;
1479 rnp->parent = NULL;
1480 } else {
1481 rnp->grpnum = j % rsp->levelspread[i - 1];
1482 rnp->grpmask = 1UL << rnp->grpnum;
1483 rnp->parent = rsp->level[i - 1] +
1484 j / rsp->levelspread[i - 1];
1485 }
1486 rnp->level = i;
1487 }
1488 }
1489}
1490
1491/*
1492 * Helper macro for __rcu_init(). To be used nowhere else!
1493 * Assigns leaf node pointers into each CPU's rcu_data structure.
1494 */
1495#define RCU_DATA_PTR_INIT(rsp, rcu_data) \
1496do { \
1497 rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
1498 j = 0; \
1499 for_each_possible_cpu(i) { \
1500 if (i > rnp[j].grphi) \
1501 j++; \
1502 per_cpu(rcu_data, i).mynode = &rnp[j]; \
1503 (rsp)->rda[i] = &per_cpu(rcu_data, i); \
1504 } \
1505} while (0)
1506
1507static struct notifier_block __cpuinitdata rcu_nb = {
1508 .notifier_call = rcu_cpu_notify,
1509};
1510
1511void __init __rcu_init(void)
1512{
1513 int i; /* All used by RCU_DATA_PTR_INIT(). */
1514 int j;
1515 struct rcu_node *rnp;
1516
1517 printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
1518#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1519 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1520#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1521 rcu_init_one(&rcu_state);
1522 RCU_DATA_PTR_INIT(&rcu_state, rcu_data);
1523 rcu_init_one(&rcu_bh_state);
1524 RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data);
1525
1526 for_each_online_cpu(i)
1527 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
1528 /* Register notifier for non-boot CPUs */
1529 register_cpu_notifier(&rcu_nb);
1530 printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
1531}
1532
1533module_param(blimit, int, 0);
1534module_param(qhimark, int, 0);
1535module_param(qlowmark, int, 0);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
new file mode 100644
index 000000000000..d6db3e837826
--- /dev/null
+++ b/kernel/rcutree_trace.c
@@ -0,0 +1,271 @@
1/*
2 * Read-Copy Update tracing for classic implementation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2008
19 *
20 * Papers: http://www.rdrop.com/users/paulmck/RCU
21 *
22 * For detailed explanation of Read-Copy Update mechanism see -
23 * Documentation/RCU
24 *
25 */
26#include <linux/types.h>
27#include <linux/kernel.h>
28#include <linux/init.h>
29#include <linux/spinlock.h>
30#include <linux/smp.h>
31#include <linux/rcupdate.h>
32#include <linux/interrupt.h>
33#include <linux/sched.h>
34#include <asm/atomic.h>
35#include <linux/bitops.h>
36#include <linux/module.h>
37#include <linux/completion.h>
38#include <linux/moduleparam.h>
39#include <linux/percpu.h>
40#include <linux/notifier.h>
41#include <linux/cpu.h>
42#include <linux/mutex.h>
43#include <linux/debugfs.h>
44#include <linux/seq_file.h>
45
46static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
47{
48 if (!rdp->beenonline)
49 return;
50 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x",
51 rdp->cpu,
52 cpu_is_offline(rdp->cpu) ? '!' : ' ',
53 rdp->completed, rdp->gpnum,
54 rdp->passed_quiesc, rdp->passed_quiesc_completed,
55 rdp->qs_pending,
56 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
57 (int)(rdp->n_rcu_pending & 0xffff));
58#ifdef CONFIG_NO_HZ
59 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
60 rdp->dynticks->dynticks,
61 rdp->dynticks->dynticks_nesting,
62 rdp->dynticks->dynticks_nmi,
63 rdp->dynticks_fqs);
64#endif /* #ifdef CONFIG_NO_HZ */
65 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
66 seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit);
67}
68
69#define PRINT_RCU_DATA(name, func, m) \
70 do { \
71 int _p_r_d_i; \
72 \
73 for_each_possible_cpu(_p_r_d_i) \
74 func(m, &per_cpu(name, _p_r_d_i)); \
75 } while (0)
76
77static int show_rcudata(struct seq_file *m, void *unused)
78{
79 seq_puts(m, "rcu:\n");
80 PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m);
81 seq_puts(m, "rcu_bh:\n");
82 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
83 return 0;
84}
85
86static int rcudata_open(struct inode *inode, struct file *file)
87{
88 return single_open(file, show_rcudata, NULL);
89}
90
91static struct file_operations rcudata_fops = {
92 .owner = THIS_MODULE,
93 .open = rcudata_open,
94 .read = seq_read,
95 .llseek = seq_lseek,
96 .release = single_release,
97};
98
99static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
100{
101 if (!rdp->beenonline)
102 return;
103 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld",
104 rdp->cpu,
105 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
106 rdp->completed, rdp->gpnum,
107 rdp->passed_quiesc, rdp->passed_quiesc_completed,
108 rdp->qs_pending,
109 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
110 rdp->n_rcu_pending);
111#ifdef CONFIG_NO_HZ
112 seq_printf(m, ",%d,%d,%d,%lu",
113 rdp->dynticks->dynticks,
114 rdp->dynticks->dynticks_nesting,
115 rdp->dynticks->dynticks_nmi,
116 rdp->dynticks_fqs);
117#endif /* #ifdef CONFIG_NO_HZ */
118 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
119 seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit);
120}
121
122static int show_rcudata_csv(struct seq_file *m, void *unused)
123{
124 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\",");
125#ifdef CONFIG_NO_HZ
126 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
127#endif /* #ifdef CONFIG_NO_HZ */
128 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
129 seq_puts(m, "\"rcu:\"\n");
130 PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m);
131 seq_puts(m, "\"rcu_bh:\"\n");
132 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
133 return 0;
134}
135
136static int rcudata_csv_open(struct inode *inode, struct file *file)
137{
138 return single_open(file, show_rcudata_csv, NULL);
139}
140
141static struct file_operations rcudata_csv_fops = {
142 .owner = THIS_MODULE,
143 .open = rcudata_csv_open,
144 .read = seq_read,
145 .llseek = seq_lseek,
146 .release = single_release,
147};
148
149static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
150{
151 int level = 0;
152 struct rcu_node *rnp;
153
154 seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
155 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
156 rsp->completed, rsp->gpnum, rsp->signaled,
157 (long)(rsp->jiffies_force_qs - jiffies),
158 (int)(jiffies & 0xffff),
159 rsp->n_force_qs, rsp->n_force_qs_ngp,
160 rsp->n_force_qs - rsp->n_force_qs_ngp,
161 rsp->n_force_qs_lh);
162 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
163 if (rnp->level != level) {
164 seq_puts(m, "\n");
165 level = rnp->level;
166 }
167 seq_printf(m, "%lx/%lx %d:%d ^%d ",
168 rnp->qsmask, rnp->qsmaskinit,
169 rnp->grplo, rnp->grphi, rnp->grpnum);
170 }
171 seq_puts(m, "\n");
172}
173
174static int show_rcuhier(struct seq_file *m, void *unused)
175{
176 seq_puts(m, "rcu:\n");
177 print_one_rcu_state(m, &rcu_state);
178 seq_puts(m, "rcu_bh:\n");
179 print_one_rcu_state(m, &rcu_bh_state);
180 return 0;
181}
182
183static int rcuhier_open(struct inode *inode, struct file *file)
184{
185 return single_open(file, show_rcuhier, NULL);
186}
187
188static struct file_operations rcuhier_fops = {
189 .owner = THIS_MODULE,
190 .open = rcuhier_open,
191 .read = seq_read,
192 .llseek = seq_lseek,
193 .release = single_release,
194};
195
196static int show_rcugp(struct seq_file *m, void *unused)
197{
198 seq_printf(m, "rcu: completed=%ld gpnum=%ld\n",
199 rcu_state.completed, rcu_state.gpnum);
200 seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
201 rcu_bh_state.completed, rcu_bh_state.gpnum);
202 return 0;
203}
204
205static int rcugp_open(struct inode *inode, struct file *file)
206{
207 return single_open(file, show_rcugp, NULL);
208}
209
210static struct file_operations rcugp_fops = {
211 .owner = THIS_MODULE,
212 .open = rcugp_open,
213 .read = seq_read,
214 .llseek = seq_lseek,
215 .release = single_release,
216};
217
218static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
219static int __init rcuclassic_trace_init(void)
220{
221 rcudir = debugfs_create_dir("rcu", NULL);
222 if (!rcudir)
223 goto out;
224
225 datadir = debugfs_create_file("rcudata", 0444, rcudir,
226 NULL, &rcudata_fops);
227 if (!datadir)
228 goto free_out;
229
230 datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir,
231 NULL, &rcudata_csv_fops);
232 if (!datadir_csv)
233 goto free_out;
234
235 gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
236 if (!gpdir)
237 goto free_out;
238
239 hierdir = debugfs_create_file("rcuhier", 0444, rcudir,
240 NULL, &rcuhier_fops);
241 if (!hierdir)
242 goto free_out;
243 return 0;
244free_out:
245 if (datadir)
246 debugfs_remove(datadir);
247 if (datadir_csv)
248 debugfs_remove(datadir_csv);
249 if (gpdir)
250 debugfs_remove(gpdir);
251 debugfs_remove(rcudir);
252out:
253 return 1;
254}
255
256static void __exit rcuclassic_trace_cleanup(void)
257{
258 debugfs_remove(datadir);
259 debugfs_remove(datadir_csv);
260 debugfs_remove(gpdir);
261 debugfs_remove(hierdir);
262 debugfs_remove(rcudir);
263}
264
265
266module_init(rcuclassic_trace_init);
267module_exit(rcuclassic_trace_cleanup);
268
269MODULE_AUTHOR("Paul E. McKenney");
270MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
271MODULE_LICENSE("GPL");
diff --git a/kernel/resource.c b/kernel/resource.c
index 4337063663ef..e633106b12f6 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -853,6 +853,15 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
853 if (PFN_DOWN(p->start) <= PFN_DOWN(addr) && 853 if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
854 PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1)) 854 PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
855 continue; 855 continue;
856 /*
857 * if a resource is "BUSY", it's not a hardware resource
858 * but a driver mapping of such a resource; we don't want
859 * to warn for those; some drivers legitimately map only
860 * partial hardware resources. (example: vesafb)
861 */
862 if (p->flags & IORESOURCE_BUSY)
863 continue;
864
856 printk(KERN_WARNING "resource map sanity check conflict: " 865 printk(KERN_WARNING "resource map sanity check conflict: "
857 "0x%llx 0x%llx 0x%llx 0x%llx %s\n", 866 "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
858 (unsigned long long)addr, 867 (unsigned long long)addr,
diff --git a/kernel/sched.c b/kernel/sched.c
index e4bb1dd7b308..3e70963120a0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4203,7 +4203,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4203 4203
4204 if (p == rq->idle) { 4204 if (p == rq->idle) {
4205 p->stime = cputime_add(p->stime, steal); 4205 p->stime = cputime_add(p->stime, steal);
4206 account_group_system_time(p, steal);
4207 if (atomic_read(&rq->nr_iowait) > 0) 4206 if (atomic_read(&rq->nr_iowait) > 0)
4208 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4207 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4209 else 4208 else
@@ -4339,7 +4338,7 @@ void __kprobes sub_preempt_count(int val)
4339 /* 4338 /*
4340 * Underflow? 4339 * Underflow?
4341 */ 4340 */
4342 if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) 4341 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
4343 return; 4342 return;
4344 /* 4343 /*
4345 * Is the spinlock portion underflowing? 4344 * Is the spinlock portion underflowing?
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..466e75ce271a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -102,20 +102,6 @@ void local_bh_disable(void)
102 102
103EXPORT_SYMBOL(local_bh_disable); 103EXPORT_SYMBOL(local_bh_disable);
104 104
105void __local_bh_enable(void)
106{
107 WARN_ON_ONCE(in_irq());
108
109 /*
110 * softirqs should never be enabled by __local_bh_enable(),
111 * it always nests inside local_bh_enable() sections:
112 */
113 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
114
115 sub_preempt_count(SOFTIRQ_OFFSET);
116}
117EXPORT_SYMBOL_GPL(__local_bh_enable);
118
119/* 105/*
120 * Special-case - softirqs can safely be enabled in 106 * Special-case - softirqs can safely be enabled in
121 * cond_resched_softirq(), or by __do_softirq(), 107 * cond_resched_softirq(), or by __do_softirq(),
@@ -269,6 +255,7 @@ void irq_enter(void)
269{ 255{
270 int cpu = smp_processor_id(); 256 int cpu = smp_processor_id();
271 257
258 rcu_irq_enter();
272 if (idle_cpu(cpu) && !in_interrupt()) { 259 if (idle_cpu(cpu) && !in_interrupt()) {
273 __irq_enter(); 260 __irq_enter();
274 tick_check_idle(cpu); 261 tick_check_idle(cpu);
@@ -295,9 +282,9 @@ void irq_exit(void)
295 282
296#ifdef CONFIG_NO_HZ 283#ifdef CONFIG_NO_HZ
297 /* Make sure that timer wheel updates are propagated */ 284 /* Make sure that timer wheel updates are propagated */
298 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
299 tick_nohz_stop_sched_tick(0);
300 rcu_irq_exit(); 285 rcu_irq_exit();
286 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
287 tick_nohz_stop_sched_tick(0);
301#endif 288#endif
302 preempt_enable_no_resched(); 289 preempt_enable_no_resched();
303} 290}
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index dc0b3be6b7d5..1ab790c67b17 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
164/* 164/*
165 * Zero means infinite timeout - no checking done: 165 * Zero means infinite timeout - no checking done:
166 */ 166 */
167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; 167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
168 168
169unsigned long __read_mostly sysctl_hung_task_warnings = 10; 169unsigned long __read_mostly sysctl_hung_task_warnings = 10;
170 170
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 94b527ef1d1e..eb212f8f8bc8 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/kernel.h>
9#include <linux/module.h> 10#include <linux/module.h>
10#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
11#include <linux/stacktrace.h> 12#include <linux/stacktrace.h>
@@ -24,3 +25,13 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
24} 25}
25EXPORT_SYMBOL_GPL(print_stack_trace); 26EXPORT_SYMBOL_GPL(print_stack_trace);
26 27
28/*
29 * Architectures that do not implement save_stack_trace_tsk get this
30 * weak alias and a once-per-bootup warning (whenever this facility
31 * is utilized - for example by procfs):
32 */
33__weak void
34save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
35{
36 WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");
37}
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d16..5fc3a0cfb994 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
858 struct task_cputime cputime; 858 struct task_cputime cputime;
859 cputime_t cutime, cstime; 859 cputime_t cutime, cstime;
860 860
861 spin_lock_irq(&current->sighand->siglock);
862 thread_group_cputime(current, &cputime); 861 thread_group_cputime(current, &cputime);
862 spin_lock_irq(&current->sighand->siglock);
863 cutime = current->signal->cutime; 863 cutime = current->signal->cutime;
864 cstime = current->signal->cstime; 864 cstime = current->signal->cstime;
865 spin_unlock_irq(&current->sighand->siglock); 865 spin_unlock_irq(&current->sighand->siglock);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..eae594cb6ea9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -252,6 +252,14 @@ config DEBUG_OBJECTS_TIMERS
252 timer routines to track the life time of timer objects and 252 timer routines to track the life time of timer objects and
253 validate the timer operations. 253 validate the timer operations.
254 254
255config DEBUG_OBJECTS_ENABLE_DEFAULT
256 int "debug_objects bootup default value (0-1)"
257 range 0 1
258 default "1"
259 depends on DEBUG_OBJECTS
260 help
261 Debug objects boot parameter default value
262
255config DEBUG_SLAB 263config DEBUG_SLAB
256 bool "Debug slab memory allocations" 264 bool "Debug slab memory allocations"
257 depends on DEBUG_KERNEL && SLAB 265 depends on DEBUG_KERNEL && SLAB
@@ -545,6 +553,16 @@ config DEBUG_SG
545 553
546 If unsure, say N. 554 If unsure, say N.
547 555
556config DEBUG_NOTIFIERS
557 bool "Debug notifier call chains"
558 depends on DEBUG_KERNEL
559 help
560 Enable this to turn on sanity checking for notifier call chains.
561 This is most useful for kernel developers to make sure that
562 modules properly unregister themselves from notifier chains.
563 This is a relatively cheap check but if you care about maximum
564 performance, say N.
565
548config FRAME_POINTER 566config FRAME_POINTER
549 bool "Compile the kernel with frame pointers" 567 bool "Compile the kernel with frame pointers"
550 depends on DEBUG_KERNEL && \ 568 depends on DEBUG_KERNEL && \
@@ -619,6 +637,19 @@ config RCU_CPU_STALL_DETECTOR
619 637
620 Say N if you are unsure. 638 Say N if you are unsure.
621 639
640config RCU_CPU_STALL_DETECTOR
641 bool "Check for stalled CPUs delaying RCU grace periods"
642 depends on CLASSIC_RCU || TREE_RCU
643 default n
644 help
645 This option causes RCU to printk information on which
646 CPUs are delaying the current grace period, but only when
647 the grace period extends for excessive time periods.
648
649 Say Y if you want RCU to perform such checks.
650
651 Say N if you are unsure.
652
622config KPROBES_SANITY_TEST 653config KPROBES_SANITY_TEST
623 bool "Kprobes sanity tests" 654 bool "Kprobes sanity tests"
624 depends on DEBUG_KERNEL 655 depends on DEBUG_KERNEL
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index e3ab374e1334..5d99be1fd988 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -45,7 +45,9 @@ static struct kmem_cache *obj_cache;
45static int debug_objects_maxchain __read_mostly; 45static int debug_objects_maxchain __read_mostly;
46static int debug_objects_fixups __read_mostly; 46static int debug_objects_fixups __read_mostly;
47static int debug_objects_warnings __read_mostly; 47static int debug_objects_warnings __read_mostly;
48static int debug_objects_enabled __read_mostly; 48static int debug_objects_enabled __read_mostly
49 = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
50
49static struct debug_obj_descr *descr_test __read_mostly; 51static struct debug_obj_descr *descr_test __read_mostly;
50 52
51static int __init enable_object_debug(char *str) 53static int __init enable_object_debug(char *str)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5f6c629a924d..fa2dc4e5f9ba 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -21,9 +21,12 @@
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/swiotlb.h>
24#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/swiotlb.h>
25#include <linux/types.h> 27#include <linux/types.h>
26#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/highmem.h>
27 30
28#include <asm/io.h> 31#include <asm/io.h>
29#include <asm/dma.h> 32#include <asm/dma.h>
@@ -36,22 +39,6 @@
36#define OFFSET(val,align) ((unsigned long) \ 39#define OFFSET(val,align) ((unsigned long) \
37 ( (val) & ( (align) - 1))) 40 ( (val) & ( (align) - 1)))
38 41
39#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
40#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
41
42/*
43 * Maximum allowable number of contiguous slabs to map,
44 * must be a power of 2. What is the appropriate value ?
45 * The complexity of {map,unmap}_single is linearly dependent on this value.
46 */
47#define IO_TLB_SEGSIZE 128
48
49/*
50 * log of the size of each IO TLB slab. The number of slabs is command line
51 * controllable.
52 */
53#define IO_TLB_SHIFT 11
54
55#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 42#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
56 43
57/* 44/*
@@ -102,7 +89,10 @@ static unsigned int io_tlb_index;
102 * We need to save away the original address corresponding to a mapped entry 89 * We need to save away the original address corresponding to a mapped entry
103 * for the sync operations. 90 * for the sync operations.
104 */ 91 */
105static unsigned char **io_tlb_orig_addr; 92static struct swiotlb_phys_addr {
93 struct page *page;
94 unsigned int offset;
95} *io_tlb_orig_addr;
106 96
107/* 97/*
108 * Protect the above data structures in the map and unmap calls 98 * Protect the above data structures in the map and unmap calls
@@ -126,6 +116,72 @@ setup_io_tlb_npages(char *str)
126__setup("swiotlb=", setup_io_tlb_npages); 116__setup("swiotlb=", setup_io_tlb_npages);
127/* make io_tlb_overflow tunable too? */ 117/* make io_tlb_overflow tunable too? */
128 118
119void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
120{
121 return alloc_bootmem_low_pages(size);
122}
123
124void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
125{
126 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
127}
128
129dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
130{
131 return paddr;
132}
133
134phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
135{
136 return baddr;
137}
138
139static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
140{
141 return swiotlb_phys_to_bus(virt_to_phys(address));
142}
143
144static void *swiotlb_bus_to_virt(dma_addr_t address)
145{
146 return phys_to_virt(swiotlb_bus_to_phys(address));
147}
148
149int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
150{
151 return 0;
152}
153
154static dma_addr_t swiotlb_sg_to_bus(struct scatterlist *sg)
155{
156 return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset);
157}
158
159static void swiotlb_print_info(unsigned long bytes)
160{
161 phys_addr_t pstart, pend;
162 dma_addr_t bstart, bend;
163
164 pstart = virt_to_phys(io_tlb_start);
165 pend = virt_to_phys(io_tlb_end);
166
167 bstart = swiotlb_phys_to_bus(pstart);
168 bend = swiotlb_phys_to_bus(pend);
169
170 printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
171 bytes >> 20, io_tlb_start, io_tlb_end);
172 if (pstart != bstart || pend != bend)
173 printk(KERN_INFO "software IO TLB at phys %#llx - %#llx"
174 " bus %#llx - %#llx\n",
175 (unsigned long long)pstart,
176 (unsigned long long)pend,
177 (unsigned long long)bstart,
178 (unsigned long long)bend);
179 else
180 printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
181 (unsigned long long)pstart,
182 (unsigned long long)pend);
183}
184
129/* 185/*
130 * Statically reserve bounce buffer space and initialize bounce buffer data 186 * Statically reserve bounce buffer space and initialize bounce buffer data
131 * structures for the software IO TLB used to implement the DMA API. 187 * structures for the software IO TLB used to implement the DMA API.
@@ -145,7 +201,7 @@ swiotlb_init_with_default_size(size_t default_size)
145 /* 201 /*
146 * Get IO TLB memory from the low pages 202 * Get IO TLB memory from the low pages
147 */ 203 */
148 io_tlb_start = alloc_bootmem_low_pages(bytes); 204 io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
149 if (!io_tlb_start) 205 if (!io_tlb_start)
150 panic("Cannot allocate SWIOTLB buffer"); 206 panic("Cannot allocate SWIOTLB buffer");
151 io_tlb_end = io_tlb_start + bytes; 207 io_tlb_end = io_tlb_start + bytes;
@@ -159,7 +215,7 @@ swiotlb_init_with_default_size(size_t default_size)
159 for (i = 0; i < io_tlb_nslabs; i++) 215 for (i = 0; i < io_tlb_nslabs; i++)
160 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 216 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
161 io_tlb_index = 0; 217 io_tlb_index = 0;
162 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); 218 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
163 219
164 /* 220 /*
165 * Get the overflow emergency buffer 221 * Get the overflow emergency buffer
@@ -168,8 +224,7 @@ swiotlb_init_with_default_size(size_t default_size)
168 if (!io_tlb_overflow_buffer) 224 if (!io_tlb_overflow_buffer)
169 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 225 panic("Cannot allocate SWIOTLB overflow buffer!\n");
170 226
171 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", 227 swiotlb_print_info(bytes);
172 virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
173} 228}
174 229
175void __init 230void __init
@@ -202,8 +257,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
202 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 257 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
203 258
204 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 259 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
205 io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, 260 io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
206 order);
207 if (io_tlb_start) 261 if (io_tlb_start)
208 break; 262 break;
209 order--; 263 order--;
@@ -235,12 +289,12 @@ swiotlb_late_init_with_default_size(size_t default_size)
235 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 289 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
236 io_tlb_index = 0; 290 io_tlb_index = 0;
237 291
238 io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, 292 io_tlb_orig_addr = (struct swiotlb_phys_addr *)__get_free_pages(GFP_KERNEL,
239 get_order(io_tlb_nslabs * sizeof(char *))); 293 get_order(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr)));
240 if (!io_tlb_orig_addr) 294 if (!io_tlb_orig_addr)
241 goto cleanup3; 295 goto cleanup3;
242 296
243 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); 297 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(struct swiotlb_phys_addr));
244 298
245 /* 299 /*
246 * Get the overflow emergency buffer 300 * Get the overflow emergency buffer
@@ -250,9 +304,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
250 if (!io_tlb_overflow_buffer) 304 if (!io_tlb_overflow_buffer)
251 goto cleanup4; 305 goto cleanup4;
252 306
253 printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " 307 swiotlb_print_info(bytes);
254 "0x%lx\n", bytes >> 20,
255 virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
256 308
257 return 0; 309 return 0;
258 310
@@ -279,16 +331,69 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
279 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); 331 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
280} 332}
281 333
334static inline int range_needs_mapping(void *ptr, size_t size)
335{
336 return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
337}
338
282static int is_swiotlb_buffer(char *addr) 339static int is_swiotlb_buffer(char *addr)
283{ 340{
284 return addr >= io_tlb_start && addr < io_tlb_end; 341 return addr >= io_tlb_start && addr < io_tlb_end;
285} 342}
286 343
344static struct swiotlb_phys_addr swiotlb_bus_to_phys_addr(char *dma_addr)
345{
346 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
347 struct swiotlb_phys_addr buffer = io_tlb_orig_addr[index];
348 buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
349 buffer.page += buffer.offset >> PAGE_SHIFT;
350 buffer.offset &= PAGE_SIZE - 1;
351 return buffer;
352}
353
354static void
355__sync_single(struct swiotlb_phys_addr buffer, char *dma_addr, size_t size, int dir)
356{
357 if (PageHighMem(buffer.page)) {
358 size_t len, bytes;
359 char *dev, *host, *kmp;
360
361 len = size;
362 while (len != 0) {
363 unsigned long flags;
364
365 bytes = len;
366 if ((bytes + buffer.offset) > PAGE_SIZE)
367 bytes = PAGE_SIZE - buffer.offset;
368 local_irq_save(flags); /* protects KM_BOUNCE_READ */
369 kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
370 dev = dma_addr + size - len;
371 host = kmp + buffer.offset;
372 if (dir == DMA_FROM_DEVICE)
373 memcpy(host, dev, bytes);
374 else
375 memcpy(dev, host, bytes);
376 kunmap_atomic(kmp, KM_BOUNCE_READ);
377 local_irq_restore(flags);
378 len -= bytes;
379 buffer.page++;
380 buffer.offset = 0;
381 }
382 } else {
383 void *v = page_address(buffer.page) + buffer.offset;
384
385 if (dir == DMA_TO_DEVICE)
386 memcpy(dma_addr, v, size);
387 else
388 memcpy(v, dma_addr, size);
389 }
390}
391
287/* 392/*
288 * Allocates bounce buffer and returns its kernel virtual address. 393 * Allocates bounce buffer and returns its kernel virtual address.
289 */ 394 */
290static void * 395static void *
291map_single(struct device *hwdev, char *buffer, size_t size, int dir) 396map_single(struct device *hwdev, struct swiotlb_phys_addr buffer, size_t size, int dir)
292{ 397{
293 unsigned long flags; 398 unsigned long flags;
294 char *dma_addr; 399 char *dma_addr;
@@ -298,11 +403,16 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
298 unsigned long mask; 403 unsigned long mask;
299 unsigned long offset_slots; 404 unsigned long offset_slots;
300 unsigned long max_slots; 405 unsigned long max_slots;
406 struct swiotlb_phys_addr slot_buf;
301 407
302 mask = dma_get_seg_boundary(hwdev); 408 mask = dma_get_seg_boundary(hwdev);
303 start_dma_addr = virt_to_bus(io_tlb_start) & mask; 409 start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
304 410
305 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 411 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
412
413 /*
414 * Carefully handle integer overflow which can occur when mask == ~0UL.
415 */
306 max_slots = mask + 1 416 max_slots = mask + 1
307 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT 417 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
308 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); 418 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
@@ -378,10 +488,15 @@ found:
378 * This is needed when we sync the memory. Then we sync the buffer if 488 * This is needed when we sync the memory. Then we sync the buffer if
379 * needed. 489 * needed.
380 */ 490 */
381 for (i = 0; i < nslots; i++) 491 slot_buf = buffer;
382 io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT); 492 for (i = 0; i < nslots; i++) {
493 slot_buf.page += slot_buf.offset >> PAGE_SHIFT;
494 slot_buf.offset &= PAGE_SIZE - 1;
495 io_tlb_orig_addr[index+i] = slot_buf;
496 slot_buf.offset += 1 << IO_TLB_SHIFT;
497 }
383 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 498 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
384 memcpy(dma_addr, buffer, size); 499 __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
385 500
386 return dma_addr; 501 return dma_addr;
387} 502}
@@ -395,17 +510,17 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
395 unsigned long flags; 510 unsigned long flags;
396 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 511 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
397 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 512 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
398 char *buffer = io_tlb_orig_addr[index]; 513 struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
399 514
400 /* 515 /*
401 * First, sync the memory before unmapping the entry 516 * First, sync the memory before unmapping the entry
402 */ 517 */
403 if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) 518 if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
404 /* 519 /*
405 * bounce... copy the data back into the original buffer * and 520 * bounce... copy the data back into the original buffer * and
406 * delete the bounce buffer. 521 * delete the bounce buffer.
407 */ 522 */
408 memcpy(buffer, dma_addr, size); 523 __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
409 524
410 /* 525 /*
411 * Return the buffer to the free list by setting the corresponding 526 * Return the buffer to the free list by setting the corresponding
@@ -437,21 +552,18 @@ static void
437sync_single(struct device *hwdev, char *dma_addr, size_t size, 552sync_single(struct device *hwdev, char *dma_addr, size_t size,
438 int dir, int target) 553 int dir, int target)
439{ 554{
440 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 555 struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr);
441 char *buffer = io_tlb_orig_addr[index];
442
443 buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
444 556
445 switch (target) { 557 switch (target) {
446 case SYNC_FOR_CPU: 558 case SYNC_FOR_CPU:
447 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 559 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
448 memcpy(buffer, dma_addr, size); 560 __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
449 else 561 else
450 BUG_ON(dir != DMA_TO_DEVICE); 562 BUG_ON(dir != DMA_TO_DEVICE);
451 break; 563 break;
452 case SYNC_FOR_DEVICE: 564 case SYNC_FOR_DEVICE:
453 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 565 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
454 memcpy(dma_addr, buffer, size); 566 __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
455 else 567 else
456 BUG_ON(dir != DMA_FROM_DEVICE); 568 BUG_ON(dir != DMA_FROM_DEVICE);
457 break; 569 break;
@@ -473,7 +585,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
473 dma_mask = hwdev->coherent_dma_mask; 585 dma_mask = hwdev->coherent_dma_mask;
474 586
475 ret = (void *)__get_free_pages(flags, order); 587 ret = (void *)__get_free_pages(flags, order);
476 if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) { 588 if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) {
477 /* 589 /*
478 * The allocated memory isn't reachable by the device. 590 * The allocated memory isn't reachable by the device.
479 * Fall back on swiotlb_map_single(). 591 * Fall back on swiotlb_map_single().
@@ -488,13 +600,16 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
488 * swiotlb_map_single(), which will grab memory from 600 * swiotlb_map_single(), which will grab memory from
489 * the lowest available address range. 601 * the lowest available address range.
490 */ 602 */
491 ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE); 603 struct swiotlb_phys_addr buffer;
604 buffer.page = virt_to_page(NULL);
605 buffer.offset = 0;
606 ret = map_single(hwdev, buffer, size, DMA_FROM_DEVICE);
492 if (!ret) 607 if (!ret)
493 return NULL; 608 return NULL;
494 } 609 }
495 610
496 memset(ret, 0, size); 611 memset(ret, 0, size);
497 dev_addr = virt_to_bus(ret); 612 dev_addr = swiotlb_virt_to_bus(ret);
498 613
499 /* Confirm address can be DMA'd by device */ 614 /* Confirm address can be DMA'd by device */
500 if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) { 615 if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
@@ -554,8 +669,9 @@ dma_addr_t
554swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, 669swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
555 int dir, struct dma_attrs *attrs) 670 int dir, struct dma_attrs *attrs)
556{ 671{
557 dma_addr_t dev_addr = virt_to_bus(ptr); 672 dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
558 void *map; 673 void *map;
674 struct swiotlb_phys_addr buffer;
559 675
560 BUG_ON(dir == DMA_NONE); 676 BUG_ON(dir == DMA_NONE);
561 /* 677 /*
@@ -563,19 +679,22 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
563 * we can safely return the device addr and not worry about bounce 679 * we can safely return the device addr and not worry about bounce
564 * buffering it. 680 * buffering it.
565 */ 681 */
566 if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force) 682 if (!address_needs_mapping(hwdev, dev_addr, size) &&
683 !range_needs_mapping(ptr, size))
567 return dev_addr; 684 return dev_addr;
568 685
569 /* 686 /*
570 * Oh well, have to allocate and map a bounce buffer. 687 * Oh well, have to allocate and map a bounce buffer.
571 */ 688 */
572 map = map_single(hwdev, ptr, size, dir); 689 buffer.page = virt_to_page(ptr);
690 buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
691 map = map_single(hwdev, buffer, size, dir);
573 if (!map) { 692 if (!map) {
574 swiotlb_full(hwdev, size, dir, 1); 693 swiotlb_full(hwdev, size, dir, 1);
575 map = io_tlb_overflow_buffer; 694 map = io_tlb_overflow_buffer;
576 } 695 }
577 696
578 dev_addr = virt_to_bus(map); 697 dev_addr = swiotlb_virt_to_bus(map);
579 698
580 /* 699 /*
581 * Ensure that the address returned is DMA'ble 700 * Ensure that the address returned is DMA'ble
@@ -605,7 +724,7 @@ void
605swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, 724swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
606 size_t size, int dir, struct dma_attrs *attrs) 725 size_t size, int dir, struct dma_attrs *attrs)
607{ 726{
608 char *dma_addr = bus_to_virt(dev_addr); 727 char *dma_addr = swiotlb_bus_to_virt(dev_addr);
609 728
610 BUG_ON(dir == DMA_NONE); 729 BUG_ON(dir == DMA_NONE);
611 if (is_swiotlb_buffer(dma_addr)) 730 if (is_swiotlb_buffer(dma_addr))
@@ -635,7 +754,7 @@ static void
635swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 754swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
636 size_t size, int dir, int target) 755 size_t size, int dir, int target)
637{ 756{
638 char *dma_addr = bus_to_virt(dev_addr); 757 char *dma_addr = swiotlb_bus_to_virt(dev_addr);
639 758
640 BUG_ON(dir == DMA_NONE); 759 BUG_ON(dir == DMA_NONE);
641 if (is_swiotlb_buffer(dma_addr)) 760 if (is_swiotlb_buffer(dma_addr))
@@ -666,7 +785,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
666 unsigned long offset, size_t size, 785 unsigned long offset, size_t size,
667 int dir, int target) 786 int dir, int target)
668{ 787{
669 char *dma_addr = bus_to_virt(dev_addr) + offset; 788 char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
670 789
671 BUG_ON(dir == DMA_NONE); 790 BUG_ON(dir == DMA_NONE);
672 if (is_swiotlb_buffer(dma_addr)) 791 if (is_swiotlb_buffer(dma_addr))
@@ -714,18 +833,20 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
714 int dir, struct dma_attrs *attrs) 833 int dir, struct dma_attrs *attrs)
715{ 834{
716 struct scatterlist *sg; 835 struct scatterlist *sg;
717 void *addr; 836 struct swiotlb_phys_addr buffer;
718 dma_addr_t dev_addr; 837 dma_addr_t dev_addr;
719 int i; 838 int i;
720 839
721 BUG_ON(dir == DMA_NONE); 840 BUG_ON(dir == DMA_NONE);
722 841
723 for_each_sg(sgl, sg, nelems, i) { 842 for_each_sg(sgl, sg, nelems, i) {
724 addr = SG_ENT_VIRT_ADDRESS(sg); 843 dev_addr = swiotlb_sg_to_bus(sg);
725 dev_addr = virt_to_bus(addr); 844 if (range_needs_mapping(sg_virt(sg), sg->length) ||
726 if (swiotlb_force ||
727 address_needs_mapping(hwdev, dev_addr, sg->length)) { 845 address_needs_mapping(hwdev, dev_addr, sg->length)) {
728 void *map = map_single(hwdev, addr, sg->length, dir); 846 void *map;
847 buffer.page = sg_page(sg);
848 buffer.offset = sg->offset;
849 map = map_single(hwdev, buffer, sg->length, dir);
729 if (!map) { 850 if (!map) {
730 /* Don't panic here, we expect map_sg users 851 /* Don't panic here, we expect map_sg users
731 to do proper error handling. */ 852 to do proper error handling. */
@@ -735,7 +856,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
735 sgl[0].dma_length = 0; 856 sgl[0].dma_length = 0;
736 return 0; 857 return 0;
737 } 858 }
738 sg->dma_address = virt_to_bus(map); 859 sg->dma_address = swiotlb_virt_to_bus(map);
739 } else 860 } else
740 sg->dma_address = dev_addr; 861 sg->dma_address = dev_addr;
741 sg->dma_length = sg->length; 862 sg->dma_length = sg->length;
@@ -765,11 +886,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
765 BUG_ON(dir == DMA_NONE); 886 BUG_ON(dir == DMA_NONE);
766 887
767 for_each_sg(sgl, sg, nelems, i) { 888 for_each_sg(sgl, sg, nelems, i) {
768 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 889 if (sg->dma_address != swiotlb_sg_to_bus(sg))
769 unmap_single(hwdev, bus_to_virt(sg->dma_address), 890 unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
770 sg->dma_length, dir); 891 sg->dma_length, dir);
771 else if (dir == DMA_FROM_DEVICE) 892 else if (dir == DMA_FROM_DEVICE)
772 dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); 893 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
773 } 894 }
774} 895}
775EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 896EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -798,11 +919,11 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
798 BUG_ON(dir == DMA_NONE); 919 BUG_ON(dir == DMA_NONE);
799 920
800 for_each_sg(sgl, sg, nelems, i) { 921 for_each_sg(sgl, sg, nelems, i) {
801 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 922 if (sg->dma_address != swiotlb_sg_to_bus(sg))
802 sync_single(hwdev, bus_to_virt(sg->dma_address), 923 sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
803 sg->dma_length, dir, target); 924 sg->dma_length, dir, target);
804 else if (dir == DMA_FROM_DEVICE) 925 else if (dir == DMA_FROM_DEVICE)
805 dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); 926 dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
806 } 927 }
807} 928}
808 929
@@ -823,7 +944,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
823int 944int
824swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 945swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
825{ 946{
826 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); 947 return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
827} 948}
828 949
829/* 950/*
@@ -835,7 +956,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
835int 956int
836swiotlb_dma_supported(struct device *hwdev, u64 mask) 957swiotlb_dma_supported(struct device *hwdev, u64 mask)
837{ 958{
838 return virt_to_bus(io_tlb_end - 1) <= mask; 959 return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
839} 960}
840 961
841EXPORT_SYMBOL(swiotlb_map_single); 962EXPORT_SYMBOL(swiotlb_map_single);
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..fc031d68327e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
3049 } 3049 }
3050 up_read(&current->mm->mmap_sem); 3050 up_read(&current->mm->mmap_sem);
3051} 3051}
3052
3053#ifdef CONFIG_PROVE_LOCKING
3054void might_fault(void)
3055{
3056 might_sleep();
3057 /*
3058 * it would be nicer only to annotate paths which are not under
3059 * pagefault_disable, however that requires a larger audit and
3060 * providing helpers like get_user_atomic.
3061 */
3062 if (!in_atomic() && current->mm)
3063 might_lock_read(&current->mm->mmap_sem);
3064}
3065EXPORT_SYMBOL(might_fault);
3066#endif
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ba537fae0a4c..ce68e046d963 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1786,8 +1786,6 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
1786 if (err < 0) 1786 if (err < 0)
1787 return; 1787 return;
1788 1788
1789 __module_get(nsock->ops->owner);
1790
1791 /* Set our callbacks */ 1789 /* Set our callbacks */
1792 nsock->sk->sk_data_ready = rfcomm_l2data_ready; 1790 nsock->sk->sk_data_ready = rfcomm_l2data_ready;
1793 nsock->sk->sk_state_change = rfcomm_l2state_change; 1791 nsock->sk->sk_state_change = rfcomm_l2state_change;
diff --git a/net/socket.c b/net/socket.c
index 92764d836891..76ba80aeac1a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2307,6 +2307,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2307 } 2307 }
2308 2308
2309 (*newsock)->ops = sock->ops; 2309 (*newsock)->ops = sock->ops;
2310 __module_get((*newsock)->ops->owner);
2310 2311
2311done: 2312done:
2312 return err; 2313 return err;
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 5dd3e89f620a..b77f330d2650 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -69,6 +69,7 @@ enum {
69}; 69};
70 70
71enum { 71enum {
72 STAC_92HD73XX_NO_JD, /* no jack-detection */
72 STAC_92HD73XX_REF, 73 STAC_92HD73XX_REF,
73 STAC_DELL_M6_AMIC, 74 STAC_DELL_M6_AMIC,
74 STAC_DELL_M6_DMIC, 75 STAC_DELL_M6_DMIC,
@@ -127,6 +128,7 @@ enum {
127}; 128};
128 129
129enum { 130enum {
131 STAC_D965_REF_NO_JD, /* no jack-detection */
130 STAC_D965_REF, 132 STAC_D965_REF,
131 STAC_D965_3ST, 133 STAC_D965_3ST,
132 STAC_D965_5ST, 134 STAC_D965_5ST,
@@ -857,6 +859,7 @@ static struct hda_verb stac92hd83xxx_core_init[] = {
857 859
858 /* power state controls amps */ 860 /* power state controls amps */
859 { 0x01, AC_VERB_SET_EAPD, 1 << 2}, 861 { 0x01, AC_VERB_SET_EAPD, 1 << 2},
862 {}
860}; 863};
861 864
862static struct hda_verb stac92hd71bxx_core_init[] = { 865static struct hda_verb stac92hd71bxx_core_init[] = {
@@ -868,6 +871,7 @@ static struct hda_verb stac92hd71bxx_core_init[] = {
868 { 0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, 871 { 0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
869 { 0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, 872 { 0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
870 { 0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, 873 { 0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
874 {}
871}; 875};
872 876
873#define HD_DISABLE_PORTF 2 877#define HD_DISABLE_PORTF 2
@@ -1611,6 +1615,7 @@ static unsigned int *stac92hd73xx_brd_tbl[STAC_92HD73XX_MODELS] = {
1611}; 1615};
1612 1616
1613static const char *stac92hd73xx_models[STAC_92HD73XX_MODELS] = { 1617static const char *stac92hd73xx_models[STAC_92HD73XX_MODELS] = {
1618 [STAC_92HD73XX_NO_JD] = "no-jd",
1614 [STAC_92HD73XX_REF] = "ref", 1619 [STAC_92HD73XX_REF] = "ref",
1615 [STAC_DELL_M6_AMIC] = "dell-m6-amic", 1620 [STAC_DELL_M6_AMIC] = "dell-m6-amic",
1616 [STAC_DELL_M6_DMIC] = "dell-m6-dmic", 1621 [STAC_DELL_M6_DMIC] = "dell-m6-dmic",
@@ -1640,6 +1645,8 @@ static struct snd_pci_quirk stac92hd73xx_cfg_tbl[] = {
1640 "unknown Dell", STAC_DELL_M6_DMIC), 1645 "unknown Dell", STAC_DELL_M6_DMIC),
1641 SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x029f, 1646 SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x029f,
1642 "Dell Studio 1537", STAC_DELL_M6_DMIC), 1647 "Dell Studio 1537", STAC_DELL_M6_DMIC),
1648 SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02a0,
1649 "Dell Studio 17", STAC_DELL_M6_DMIC),
1643 {} /* terminator */ 1650 {} /* terminator */
1644}; 1651};
1645 1652
@@ -1662,6 +1669,7 @@ static struct snd_pci_quirk stac92hd83xxx_cfg_tbl[] = {
1662 /* SigmaTel reference board */ 1669 /* SigmaTel reference board */
1663 SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668, 1670 SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668,
1664 "DFI LanParty", STAC_92HD71BXX_REF), 1671 "DFI LanParty", STAC_92HD71BXX_REF),
1672 {} /* terminator */
1665}; 1673};
1666 1674
1667static unsigned int ref92hd71bxx_pin_configs[11] = { 1675static unsigned int ref92hd71bxx_pin_configs[11] = {
@@ -2027,6 +2035,7 @@ static unsigned int dell_3st_pin_configs[14] = {
2027}; 2035};
2028 2036
2029static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = { 2037static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
2038 [STAC_D965_REF_NO_JD] = ref927x_pin_configs,
2030 [STAC_D965_REF] = ref927x_pin_configs, 2039 [STAC_D965_REF] = ref927x_pin_configs,
2031 [STAC_D965_3ST] = d965_3st_pin_configs, 2040 [STAC_D965_3ST] = d965_3st_pin_configs,
2032 [STAC_D965_5ST] = d965_5st_pin_configs, 2041 [STAC_D965_5ST] = d965_5st_pin_configs,
@@ -2035,6 +2044,7 @@ static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
2035}; 2044};
2036 2045
2037static const char *stac927x_models[STAC_927X_MODELS] = { 2046static const char *stac927x_models[STAC_927X_MODELS] = {
2047 [STAC_D965_REF_NO_JD] = "ref-no-jd",
2038 [STAC_D965_REF] = "ref", 2048 [STAC_D965_REF] = "ref",
2039 [STAC_D965_3ST] = "3stack", 2049 [STAC_D965_3ST] = "3stack",
2040 [STAC_D965_5ST] = "5stack", 2050 [STAC_D965_5ST] = "5stack",
@@ -2896,7 +2906,7 @@ static int stac92xx_auto_create_multi_out_ctls(struct hda_codec *codec,
2896 } 2906 }
2897 2907
2898 if ((spec->multiout.num_dacs - cfg->line_outs) > 0 && 2908 if ((spec->multiout.num_dacs - cfg->line_outs) > 0 &&
2899 cfg->hp_outs && !spec->multiout.hp_nid) 2909 cfg->hp_outs == 1 && !spec->multiout.hp_nid)
2900 spec->multiout.hp_nid = nid; 2910 spec->multiout.hp_nid = nid;
2901 2911
2902 if (cfg->hp_outs > 1 && cfg->line_out_type == AUTO_PIN_LINE_OUT) { 2912 if (cfg->hp_outs > 1 && cfg->line_out_type == AUTO_PIN_LINE_OUT) {
@@ -4254,14 +4264,17 @@ again:
4254 4264
4255 switch (spec->multiout.num_dacs) { 4265 switch (spec->multiout.num_dacs) {
4256 case 0x3: /* 6 Channel */ 4266 case 0x3: /* 6 Channel */
4267 spec->multiout.hp_nid = 0x17;
4257 spec->mixer = stac92hd73xx_6ch_mixer; 4268 spec->mixer = stac92hd73xx_6ch_mixer;
4258 spec->init = stac92hd73xx_6ch_core_init; 4269 spec->init = stac92hd73xx_6ch_core_init;
4259 break; 4270 break;
4260 case 0x4: /* 8 Channel */ 4271 case 0x4: /* 8 Channel */
4272 spec->multiout.hp_nid = 0x18;
4261 spec->mixer = stac92hd73xx_8ch_mixer; 4273 spec->mixer = stac92hd73xx_8ch_mixer;
4262 spec->init = stac92hd73xx_8ch_core_init; 4274 spec->init = stac92hd73xx_8ch_core_init;
4263 break; 4275 break;
4264 case 0x5: /* 10 Channel */ 4276 case 0x5: /* 10 Channel */
4277 spec->multiout.hp_nid = 0x19;
4265 spec->mixer = stac92hd73xx_10ch_mixer; 4278 spec->mixer = stac92hd73xx_10ch_mixer;
4266 spec->init = stac92hd73xx_10ch_core_init; 4279 spec->init = stac92hd73xx_10ch_core_init;
4267 }; 4280 };
@@ -4297,6 +4310,7 @@ again:
4297 spec->amp_nids = &stac92hd73xx_amp_nids[DELL_M6_AMP]; 4310 spec->amp_nids = &stac92hd73xx_amp_nids[DELL_M6_AMP];
4298 spec->eapd_switch = 0; 4311 spec->eapd_switch = 0;
4299 spec->num_amps = 1; 4312 spec->num_amps = 1;
4313 spec->multiout.hp_nid = 0; /* dual HPs */
4300 4314
4301 if (!spec->init) 4315 if (!spec->init)
4302 spec->init = dell_m6_core_init; 4316 spec->init = dell_m6_core_init;
@@ -4351,6 +4365,9 @@ again:
4351 return err; 4365 return err;
4352 } 4366 }
4353 4367
4368 if (spec->board_config == STAC_92HD73XX_NO_JD)
4369 spec->hp_detect = 0;
4370
4354 codec->patch_ops = stac92xx_patch_ops; 4371 codec->patch_ops = stac92xx_patch_ops;
4355 4372
4356 return 0; 4373 return 0;
@@ -4899,6 +4916,10 @@ static int patch_stac927x(struct hda_codec *codec)
4899 */ 4916 */
4900 codec->bus->needs_damn_long_delay = 1; 4917 codec->bus->needs_damn_long_delay = 1;
4901 4918
4919 /* no jack detecion for ref-no-jd model */
4920 if (spec->board_config == STAC_D965_REF_NO_JD)
4921 spec->hp_detect = 0;
4922
4902 return 0; 4923 return 0;
4903} 4924}
4904 4925
diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
index e9084fdd2082..acd68efb2b75 100644
--- a/sound/soc/omap/omap-pcm.c
+++ b/sound/soc/omap/omap-pcm.c
@@ -233,7 +233,7 @@ static int omap_pcm_open(struct snd_pcm_substream *substream)
233 if (ret < 0) 233 if (ret < 0)
234 goto out; 234 goto out;
235 235
236 prtd = kzalloc(sizeof(prtd), GFP_KERNEL); 236 prtd = kzalloc(sizeof(*prtd), GFP_KERNEL);
237 if (prtd == NULL) { 237 if (prtd == NULL) {
238 ret = -ENOMEM; 238 ret = -ENOMEM;
239 goto out; 239 goto out;