diff options
165 files changed, 5763 insertions, 2060 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index a851118775d8..6a8c73f55b80 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -1,18 +1,22 @@ | |||
1 | CONFIG_RCU_TRACE debugfs Files and Formats | 1 | CONFIG_RCU_TRACE debugfs Files and Formats |
2 | 2 | ||
3 | 3 | ||
4 | The rcutree implementation of RCU provides debugfs trace output that | 4 | The rcutree and rcutiny implementations of RCU provide debugfs trace |
5 | summarizes counters and state. This information is useful for debugging | 5 | output that summarizes counters and state. This information is useful for |
6 | RCU itself, and can sometimes also help to debug abuses of RCU. | 6 | debugging RCU itself, and can sometimes also help to debug abuses of RCU. |
7 | The following sections describe the debugfs files and formats. | 7 | The following sections describe the debugfs files and formats, first |
8 | for rcutree and next for rcutiny. | ||
8 | 9 | ||
9 | 10 | ||
10 | Hierarchical RCU debugfs Files and Formats | 11 | CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats |
11 | 12 | ||
12 | This implementation of RCU provides three debugfs files under the | 13 | These implementations of RCU provides five debugfs files under the |
13 | top-level directory RCU: rcu/rcudata (which displays fields in struct | 14 | top-level directory RCU: rcu/rcudata (which displays fields in struct |
14 | rcu_data), rcu/rcugp (which displays grace-period counters), and | 15 | rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of |
15 | rcu/rcuhier (which displays the struct rcu_node hierarchy). | 16 | rcu/rcudata), rcu/rcugp (which displays grace-period counters), |
17 | rcu/rcuhier (which displays the struct rcu_node hierarchy), and | ||
18 | rcu/rcu_pending (which displays counts of the reasons that the | ||
19 | rcu_pending() function decided that there was core RCU work to do). | ||
16 | 20 | ||
17 | The output of "cat rcu/rcudata" looks as follows: | 21 | The output of "cat rcu/rcudata" looks as follows: |
18 | 22 | ||
@@ -130,7 +134,8 @@ o "ci" is the number of RCU callbacks that have been invoked for | |||
130 | been registered in absence of CPU-hotplug activity. | 134 | been registered in absence of CPU-hotplug activity. |
131 | 135 | ||
132 | o "co" is the number of RCU callbacks that have been orphaned due to | 136 | o "co" is the number of RCU callbacks that have been orphaned due to |
133 | this CPU going offline. | 137 | this CPU going offline. These orphaned callbacks have been moved |
138 | to an arbitrarily chosen online CPU. | ||
134 | 139 | ||
135 | o "ca" is the number of RCU callbacks that have been adopted due to | 140 | o "ca" is the number of RCU callbacks that have been adopted due to |
136 | other CPUs going offline. Note that ci+co-ca+ql is the number of | 141 | other CPUs going offline. Note that ci+co-ca+ql is the number of |
@@ -168,12 +173,12 @@ o "gpnum" is the number of grace periods that have started. It is | |||
168 | 173 | ||
169 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: | 174 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: |
170 | 175 | ||
171 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 | 176 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 |
172 | 1/1 .>. 0:127 ^0 | 177 | 1/1 .>. 0:127 ^0 |
173 | 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 | 178 | 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
174 | 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 | 179 | 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
175 | rcu_bh: | 180 | rcu_bh: |
176 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 | 181 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 |
177 | 0/1 .>. 0:127 ^0 | 182 | 0/1 .>. 0:127 ^0 |
178 | 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 | 183 | 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
179 | 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 | 184 | 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
@@ -212,11 +217,6 @@ o "fqlh" is the number of calls to force_quiescent_state() that | |||
212 | exited immediately (without even being counted in nfqs above) | 217 | exited immediately (without even being counted in nfqs above) |
213 | due to contention on ->fqslock. | 218 | due to contention on ->fqslock. |
214 | 219 | ||
215 | o "oqlen" is the number of callbacks on the "orphan" callback | ||
216 | list. RCU callbacks are placed on this list by CPUs going | ||
217 | offline, and are "adopted" either by the CPU helping the outgoing | ||
218 | CPU or by the next rcu_barrier*() call, whichever comes first. | ||
219 | |||
220 | o Each element of the form "1/1 0:127 ^0" represents one struct | 220 | o Each element of the form "1/1 0:127 ^0" represents one struct |
221 | rcu_node. Each line represents one level of the hierarchy, from | 221 | rcu_node. Each line represents one level of the hierarchy, from |
222 | root to leaves. It is best to think of the rcu_data structures | 222 | root to leaves. It is best to think of the rcu_data structures |
@@ -326,3 +326,115 @@ o "nn" is the number of times that this CPU needed nothing. Alert | |||
326 | readers will note that the rcu "nn" number for a given CPU very | 326 | readers will note that the rcu "nn" number for a given CPU very |
327 | closely matches the rcu_bh "np" number for that same CPU. This | 327 | closely matches the rcu_bh "np" number for that same CPU. This |
328 | is due to short-circuit evaluation in rcu_pending(). | 328 | is due to short-circuit evaluation in rcu_pending(). |
329 | |||
330 | |||
331 | CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats | ||
332 | |||
333 | These implementations of RCU provides a single debugfs file under the | ||
334 | top-level directory RCU, namely rcu/rcudata, which displays fields in | ||
335 | rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU, | ||
336 | rcu_preempt_ctrlblk. | ||
337 | |||
338 | The output of "cat rcu/rcudata" is as follows: | ||
339 | |||
340 | rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=... | ||
341 | ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274 | ||
342 | normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0 | ||
343 | exp balk: bt=0 nos=0 | ||
344 | rcu_sched: qlen: 0 | ||
345 | rcu_bh: qlen: 0 | ||
346 | |||
347 | This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the | ||
348 | rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds. | ||
349 | The last three lines of the rcu_preempt section appear only in | ||
350 | CONFIG_RCU_BOOST kernel builds. The fields are as follows: | ||
351 | |||
352 | o "qlen" is the number of RCU callbacks currently waiting either | ||
353 | for an RCU grace period or waiting to be invoked. This is the | ||
354 | only field present for rcu_sched and rcu_bh, due to the | ||
355 | short-circuiting of grace period in those two cases. | ||
356 | |||
357 | o "gp" is the number of grace periods that have completed. | ||
358 | |||
359 | o "g197/p197/c197" displays the grace-period state, with the | ||
360 | "g" number being the number of grace periods that have started | ||
361 | (mod 256), the "p" number being the number of grace periods | ||
362 | that the CPU has responded to (also mod 256), and the "c" | ||
363 | number being the number of grace periods that have completed | ||
364 | (once again mode 256). | ||
365 | |||
366 | Why have both "gp" and "g"? Because the data flowing into | ||
367 | "gp" is only present in a CONFIG_RCU_TRACE kernel. | ||
368 | |||
369 | o "tasks" is a set of bits. The first bit is "T" if there are | ||
370 | currently tasks that have recently blocked within an RCU | ||
371 | read-side critical section, the second bit is "N" if any of the | ||
372 | aforementioned tasks are blocking the current RCU grace period, | ||
373 | and the third bit is "E" if any of the aforementioned tasks are | ||
374 | blocking the current expedited grace period. Each bit is "." | ||
375 | if the corresponding condition does not hold. | ||
376 | |||
377 | o "ttb" is a single bit. It is "B" if any of the blocked tasks | ||
378 | need to be priority boosted and "." otherwise. | ||
379 | |||
380 | o "btg" indicates whether boosting has been carried out during | ||
381 | the current grace period, with "exp" indicating that boosting | ||
382 | is in progress for an expedited grace period, "no" indicating | ||
383 | that boosting has not yet started for a normal grace period, | ||
384 | "begun" indicating that boosting has bebug for a normal grace | ||
385 | period, and "done" indicating that boosting has completed for | ||
386 | a normal grace period. | ||
387 | |||
388 | o "ntb" is the total number of tasks subjected to RCU priority boosting | ||
389 | periods since boot. | ||
390 | |||
391 | o "neb" is the number of expedited grace periods that have had | ||
392 | to resort to RCU priority boosting since boot. | ||
393 | |||
394 | o "nnb" is the number of normal grace periods that have had | ||
395 | to resort to RCU priority boosting since boot. | ||
396 | |||
397 | o "j" is the low-order 12 bits of the jiffies counter in hexadecimal. | ||
398 | |||
399 | o "bt" is the low-order 12 bits of the value that the jiffies counter | ||
400 | will have at the next time that boosting is scheduled to begin. | ||
401 | |||
402 | o In the line beginning with "normal balk", the fields are as follows: | ||
403 | |||
404 | o "nt" is the number of times that the system balked from | ||
405 | boosting because there were no blocked tasks to boost. | ||
406 | Note that the system will balk from boosting even if the | ||
407 | grace period is overdue when the currently running task | ||
408 | is looping within an RCU read-side critical section. | ||
409 | There is no point in boosting in this case, because | ||
410 | boosting a running task won't make it run any faster. | ||
411 | |||
412 | o "gt" is the number of times that the system balked | ||
413 | from boosting because, although there were blocked tasks, | ||
414 | none of them were preventing the current grace period | ||
415 | from completing. | ||
416 | |||
417 | o "bt" is the number of times that the system balked | ||
418 | from boosting because boosting was already in progress. | ||
419 | |||
420 | o "b" is the number of times that the system balked from | ||
421 | boosting because boosting had already completed for | ||
422 | the grace period in question. | ||
423 | |||
424 | o "ny" is the number of times that the system balked from | ||
425 | boosting because it was not yet time to start boosting | ||
426 | the grace period in question. | ||
427 | |||
428 | o "nos" is the number of times that the system balked from | ||
429 | boosting for inexplicable ("not otherwise specified") | ||
430 | reasons. This can actually happen due to races involving | ||
431 | increments of the jiffies counter. | ||
432 | |||
433 | o In the line beginning with "exp balk", the fields are as follows: | ||
434 | |||
435 | o "bt" is the number of times that the system balked from | ||
436 | boosting because there were no blocked tasks to boost. | ||
437 | |||
438 | o "nos" is the number of times that the system balked from | ||
439 | boosting for inexplicable ("not otherwise specified") | ||
440 | reasons. | ||
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index d9bcffd59433..470d3dba1a69 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -62,6 +62,10 @@ aic7*reg_print.c* | |||
62 | aic7*seq.h* | 62 | aic7*seq.h* |
63 | aicasm | 63 | aicasm |
64 | aicdb.h* | 64 | aicdb.h* |
65 | altivec1.c | ||
66 | altivec2.c | ||
67 | altivec4.c | ||
68 | altivec8.c | ||
65 | asm-offsets.h | 69 | asm-offsets.h |
66 | asm_offsets.h | 70 | asm_offsets.h |
67 | autoconf.h* | 71 | autoconf.h* |
@@ -76,6 +80,7 @@ btfixupprep | |||
76 | build | 80 | build |
77 | bvmlinux | 81 | bvmlinux |
78 | bzImage* | 82 | bzImage* |
83 | capflags.c | ||
79 | classlist.h* | 84 | classlist.h* |
80 | comp*.log | 85 | comp*.log |
81 | compile.h* | 86 | compile.h* |
@@ -94,6 +99,7 @@ devlist.h* | |||
94 | docproc | 99 | docproc |
95 | elf2ecoff | 100 | elf2ecoff |
96 | elfconfig.h* | 101 | elfconfig.h* |
102 | evergreen_reg_safe.h | ||
97 | fixdep | 103 | fixdep |
98 | flask.h | 104 | flask.h |
99 | fore200e_mkfirm | 105 | fore200e_mkfirm |
@@ -108,9 +114,16 @@ genksyms | |||
108 | *_gray256.c | 114 | *_gray256.c |
109 | ihex2fw | 115 | ihex2fw |
110 | ikconfig.h* | 116 | ikconfig.h* |
117 | inat-tables.c | ||
111 | initramfs_data.cpio | 118 | initramfs_data.cpio |
112 | initramfs_data.cpio.gz | 119 | initramfs_data.cpio.gz |
113 | initramfs_list | 120 | initramfs_list |
121 | int16.c | ||
122 | int1.c | ||
123 | int2.c | ||
124 | int32.c | ||
125 | int4.c | ||
126 | int8.c | ||
114 | kallsyms | 127 | kallsyms |
115 | kconfig | 128 | kconfig |
116 | keywords.c | 129 | keywords.c |
@@ -140,6 +153,7 @@ mkprep | |||
140 | mktables | 153 | mktables |
141 | mktree | 154 | mktree |
142 | modpost | 155 | modpost |
156 | modules.builtin | ||
143 | modules.order | 157 | modules.order |
144 | modversions.h* | 158 | modversions.h* |
145 | ncscope.* | 159 | ncscope.* |
@@ -153,14 +167,23 @@ pca200e.bin | |||
153 | pca200e_ecd.bin2 | 167 | pca200e_ecd.bin2 |
154 | piggy.gz | 168 | piggy.gz |
155 | piggyback | 169 | piggyback |
170 | piggy.S | ||
156 | pnmtologo | 171 | pnmtologo |
157 | ppc_defs.h* | 172 | ppc_defs.h* |
158 | pss_boot.h | 173 | pss_boot.h |
159 | qconf | 174 | qconf |
175 | r100_reg_safe.h | ||
176 | r200_reg_safe.h | ||
177 | r300_reg_safe.h | ||
178 | r420_reg_safe.h | ||
179 | r600_reg_safe.h | ||
160 | raid6altivec*.c | 180 | raid6altivec*.c |
161 | raid6int*.c | 181 | raid6int*.c |
162 | raid6tables.c | 182 | raid6tables.c |
163 | relocs | 183 | relocs |
184 | rn50_reg_safe.h | ||
185 | rs600_reg_safe.h | ||
186 | rv515_reg_safe.h | ||
164 | series | 187 | series |
165 | setup | 188 | setup |
166 | setup.bin | 189 | setup.bin |
@@ -169,6 +192,7 @@ sImage | |||
169 | sm_tbl* | 192 | sm_tbl* |
170 | split-include | 193 | split-include |
171 | syscalltab.h | 194 | syscalltab.h |
195 | tables.c | ||
172 | tags | 196 | tags |
173 | tftpboot.img | 197 | tftpboot.img |
174 | timeconst.h | 198 | timeconst.h |
@@ -190,6 +214,7 @@ vmlinux | |||
190 | vmlinux-* | 214 | vmlinux-* |
191 | vmlinux.aout | 215 | vmlinux.aout |
192 | vmlinux.lds | 216 | vmlinux.lds |
217 | voffset.h | ||
193 | vsyscall.lds | 218 | vsyscall.lds |
194 | vsyscall_32.lds | 219 | vsyscall_32.lds |
195 | wanxlfw.inc | 220 | wanxlfw.inc |
@@ -200,3 +225,4 @@ wakeup.elf | |||
200 | wakeup.lds | 225 | wakeup.lds |
201 | zImage* | 226 | zImage* |
202 | zconf.hash.c | 227 | zconf.hash.c |
228 | zoffset.h | ||
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt index 715eaaf1519d..9a8674629a07 100644 --- a/Documentation/kernel-docs.txt +++ b/Documentation/kernel-docs.txt | |||
@@ -537,7 +537,7 @@ | |||
537 | Notes: Further information in | 537 | Notes: Further information in |
538 | http://www.oreilly.com/catalog/linuxdrive2/ | 538 | http://www.oreilly.com/catalog/linuxdrive2/ |
539 | 539 | ||
540 | * Title: "Linux Device Drivers, 3nd Edition" | 540 | * Title: "Linux Device Drivers, 3rd Edition" |
541 | Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman | 541 | Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman |
542 | Publisher: O'Reilly & Associates. | 542 | Publisher: O'Reilly & Associates. |
543 | Date: 2005. | 543 | Date: 2005. |
@@ -592,14 +592,6 @@ | |||
592 | Pages: 600. | 592 | Pages: 600. |
593 | ISBN: 0-13-101908-2 | 593 | ISBN: 0-13-101908-2 |
594 | 594 | ||
595 | * Title: "The Design and Implementation of the 4.4 BSD UNIX | ||
596 | Operating System" | ||
597 | Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels, | ||
598 | John S. Quarterman. | ||
599 | Publisher: Addison-Wesley. | ||
600 | Date: 1996. | ||
601 | ISBN: 0-201-54979-4 | ||
602 | |||
603 | * Title: "Programming for the real world - POSIX.4" | 595 | * Title: "Programming for the real world - POSIX.4" |
604 | Author: Bill O. Gallmeister. | 596 | Author: Bill O. Gallmeister. |
605 | Publisher: O'Reilly & Associates, Inc.. | 597 | Publisher: O'Reilly & Associates, Inc.. |
@@ -610,28 +602,13 @@ | |||
610 | POSIX. Good reference. | 602 | POSIX. Good reference. |
611 | 603 | ||
612 | * Title: "UNIX Systems for Modern Architectures: Symmetric | 604 | * Title: "UNIX Systems for Modern Architectures: Symmetric |
613 | Multiprocesssing and Caching for Kernel Programmers" | 605 | Multiprocessing and Caching for Kernel Programmers" |
614 | Author: Curt Schimmel. | 606 | Author: Curt Schimmel. |
615 | Publisher: Addison Wesley. | 607 | Publisher: Addison Wesley. |
616 | Date: June, 1994. | 608 | Date: June, 1994. |
617 | Pages: 432. | 609 | Pages: 432. |
618 | ISBN: 0-201-63338-8 | 610 | ISBN: 0-201-63338-8 |
619 | 611 | ||
620 | * Title: "The Design and Implementation of the 4.3 BSD UNIX | ||
621 | Operating System" | ||
622 | Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J. | ||
623 | Karels, John S. Quarterman. | ||
624 | Publisher: Addison-Wesley. | ||
625 | Date: 1989 (reprinted with corrections on October, 1990). | ||
626 | ISBN: 0-201-06196-1 | ||
627 | |||
628 | * Title: "The Design of the UNIX Operating System" | ||
629 | Author: Maurice J. Bach. | ||
630 | Publisher: Prentice Hall. | ||
631 | Date: 1986. | ||
632 | Pages: 471. | ||
633 | ISBN: 0-13-201757-1 | ||
634 | |||
635 | MISCELLANEOUS: | 612 | MISCELLANEOUS: |
636 | 613 | ||
637 | * Name: linux/Documentation | 614 | * Name: linux/Documentation |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 992cda68fa63..f3dc951e949f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1614,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1614 | noapic [SMP,APIC] Tells the kernel to not make use of any | 1614 | noapic [SMP,APIC] Tells the kernel to not make use of any |
1615 | IOAPICs that may be present in the system. | 1615 | IOAPICs that may be present in the system. |
1616 | 1616 | ||
1617 | noautogroup Disable scheduler automatic task group creation. | ||
1618 | |||
1617 | nobats [PPC] Do not use BATs for mapping kernel lowmem | 1619 | nobats [PPC] Do not use BATs for mapping kernel lowmem |
1618 | on "Classic" PPC cores. | 1620 | on "Classic" PPC cores. |
1619 | 1621 | ||
@@ -2459,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2459 | to facilitate early boot debugging. | 2461 | to facilitate early boot debugging. |
2460 | See also Documentation/trace/events.txt | 2462 | See also Documentation/trace/events.txt |
2461 | 2463 | ||
2462 | tsc= Disable clocksource-must-verify flag for TSC. | 2464 | tsc= Disable clocksource stability checks for TSC. |
2463 | Format: <string> | 2465 | Format: <string> |
2464 | [x86] reliable: mark tsc clocksource as reliable, this | 2466 | [x86] reliable: mark tsc clocksource as reliable, this |
2465 | disables clocksource verification at runtime. | 2467 | disables clocksource verification at runtime, as well |
2466 | Used to enable high-resolution timer mode on older | 2468 | as the stability checks done at bootup. Used to enable |
2467 | hardware, and in virtualized environment. | 2469 | high-resolution timer mode on older hardware, and in |
2470 | virtualized environment. | ||
2468 | [x86] noirqtime: Do not use TSC to do irq accounting. | 2471 | [x86] noirqtime: Do not use TSC to do irq accounting. |
2469 | Used to run time disable IRQ_TIME_ACCOUNTING on any | 2472 | Used to run time disable IRQ_TIME_ACCOUNTING on any |
2470 | platforms where RDTSC is slow and this accounting | 2473 | platforms where RDTSC is slow and this accounting |
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 30b43e1b2697..bdeb81ccb5f6 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -600,6 +600,7 @@ Protocol: 2.07+ | |||
600 | 0x00000001 lguest | 600 | 0x00000001 lguest |
601 | 0x00000002 Xen | 601 | 0x00000002 Xen |
602 | 0x00000003 Moorestown MID | 602 | 0x00000003 Moorestown MID |
603 | 0x00000004 CE4100 TV Platform | ||
603 | 604 | ||
604 | Field name: hardware_subarch_data | 605 | Field name: hardware_subarch_data |
605 | Type: write (subarch-dependent) | 606 | Type: write (subarch-dependent) |
diff --git a/MAINTAINERS b/MAINTAINERS index b1dda78a1e75..c5c7292daba0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2812,6 +2812,10 @@ M: Thomas Gleixner <tglx@linutronix.de> | |||
2812 | S: Maintained | 2812 | S: Maintained |
2813 | F: Documentation/timers/ | 2813 | F: Documentation/timers/ |
2814 | F: kernel/hrtimer.c | 2814 | F: kernel/hrtimer.c |
2815 | F: kernel/time/clockevents.c | ||
2816 | F: kernel/time/tick*.* | ||
2817 | F: kernel/time/timer_*.c | ||
2818 | F include/linux/clockevents.h | ||
2815 | F: include/linux/hrtimer.h | 2819 | F: include/linux/hrtimer.h |
2816 | 2820 | ||
2817 | HIGH-SPEED SCC DRIVER FOR AX.25 | 2821 | HIGH-SPEED SCC DRIVER FOR AX.25 |
@@ -5142,6 +5146,18 @@ L: alsa-devel@alsa-project.org (moderated for non-subscribers) | |||
5142 | S: Supported | 5146 | S: Supported |
5143 | F: sound/soc/s3c24xx | 5147 | F: sound/soc/s3c24xx |
5144 | 5148 | ||
5149 | TIMEKEEPING, NTP | ||
5150 | M: John Stultz <johnstul@us.ibm.com> | ||
5151 | M: Thomas Gleixner <tglx@linutronix.de> | ||
5152 | S: Supported | ||
5153 | F: include/linux/clocksource.h | ||
5154 | F: include/linux/time.h | ||
5155 | F: include/linux/timex.h | ||
5156 | F: include/linux/timekeeping.h | ||
5157 | F: kernel/time/clocksource.c | ||
5158 | F: kernel/time/time*.c | ||
5159 | F: kernel/time/ntp.c | ||
5160 | |||
5145 | TLG2300 VIDEO4LINUX-2 DRIVER | 5161 | TLG2300 VIDEO4LINUX-2 DRIVER |
5146 | M: Huang Shijie <shijie8@gmail.com> | 5162 | M: Huang Shijie <shijie8@gmail.com> |
5147 | M: Kang Yong <kangyong@telegent.com> | 5163 | M: Kang Yong <kangyong@telegent.com> |
diff --git a/arch/Kconfig b/arch/Kconfig index 8bf0fa652eb6..f78c2be4242b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI | |||
175 | config HAVE_ARCH_JUMP_LABEL | 175 | config HAVE_ARCH_JUMP_LABEL |
176 | bool | 176 | bool |
177 | 177 | ||
178 | config HAVE_ARCH_MUTEX_CPU_RELAX | ||
179 | bool | ||
180 | |||
178 | source "kernel/gcov/Kconfig" | 181 | source "kernel/gcov/Kconfig" |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e0b98e71ff47..6c6d7b339aae 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -99,6 +99,7 @@ config S390 | |||
99 | select HAVE_KERNEL_LZMA | 99 | select HAVE_KERNEL_LZMA |
100 | select HAVE_KERNEL_LZO | 100 | select HAVE_KERNEL_LZO |
101 | select HAVE_GET_USER_PAGES_FAST | 101 | select HAVE_GET_USER_PAGES_FAST |
102 | select HAVE_ARCH_MUTEX_CPU_RELAX | ||
102 | select ARCH_INLINE_SPIN_TRYLOCK | 103 | select ARCH_INLINE_SPIN_TRYLOCK |
103 | select ARCH_INLINE_SPIN_TRYLOCK_BH | 104 | select ARCH_INLINE_SPIN_TRYLOCK_BH |
104 | select ARCH_INLINE_SPIN_LOCK | 105 | select ARCH_INLINE_SPIN_LOCK |
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 458c1f7fbc18..688271f5f2e4 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h | |||
@@ -7,3 +7,5 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <asm-generic/mutex-dec.h> | 9 | #include <asm-generic/mutex-dec.h> |
10 | |||
11 | #define arch_mutex_cpu_relax() barrier() | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e330da21b84f..b6fccb07123e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -377,6 +377,18 @@ config X86_ELAN | |||
377 | 377 | ||
378 | If unsure, choose "PC-compatible" instead. | 378 | If unsure, choose "PC-compatible" instead. |
379 | 379 | ||
380 | config X86_INTEL_CE | ||
381 | bool "CE4100 TV platform" | ||
382 | depends on PCI | ||
383 | depends on PCI_GODIRECT | ||
384 | depends on X86_32 | ||
385 | depends on X86_EXTENDED_PLATFORM | ||
386 | select X86_REBOOTFIXUPS | ||
387 | ---help--- | ||
388 | Select for the Intel CE media processor (CE4100) SOC. | ||
389 | This option compiles in support for the CE4100 SOC for settop | ||
390 | boxes and media devices. | ||
391 | |||
380 | config X86_MRST | 392 | config X86_MRST |
381 | bool "Moorestown MID platform" | 393 | bool "Moorestown MID platform" |
382 | depends on PCI | 394 | depends on PCI |
@@ -385,6 +397,10 @@ config X86_MRST | |||
385 | depends on X86_EXTENDED_PLATFORM | 397 | depends on X86_EXTENDED_PLATFORM |
386 | depends on X86_IO_APIC | 398 | depends on X86_IO_APIC |
387 | select APB_TIMER | 399 | select APB_TIMER |
400 | select I2C | ||
401 | select SPI | ||
402 | select INTEL_SCU_IPC | ||
403 | select X86_PLATFORM_DEVICES | ||
388 | ---help--- | 404 | ---help--- |
389 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin | 405 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin |
390 | Internet Device(MID) platform. Moorestown consists of two chips: | 406 | Internet Device(MID) platform. Moorestown consists of two chips: |
@@ -466,6 +482,19 @@ config X86_ES7000 | |||
466 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is | 482 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is |
467 | supposed to run on an IA32-based Unisys ES7000 system. | 483 | supposed to run on an IA32-based Unisys ES7000 system. |
468 | 484 | ||
485 | config X86_32_IRIS | ||
486 | tristate "Eurobraille/Iris poweroff module" | ||
487 | depends on X86_32 | ||
488 | ---help--- | ||
489 | The Iris machines from EuroBraille do not have APM or ACPI support | ||
490 | to shut themselves down properly. A special I/O sequence is | ||
491 | needed to do so, which is what this module does at | ||
492 | kernel shutdown. | ||
493 | |||
494 | This is only for Iris machines from EuroBraille. | ||
495 | |||
496 | If unused, say N. | ||
497 | |||
469 | config SCHED_OMIT_FRAME_POINTER | 498 | config SCHED_OMIT_FRAME_POINTER |
470 | def_bool y | 499 | def_bool y |
471 | prompt "Single-depth WCHAN output" | 500 | prompt "Single-depth WCHAN output" |
@@ -1141,16 +1170,16 @@ config NUMA | |||
1141 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | 1170 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" |
1142 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) | 1171 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) |
1143 | 1172 | ||
1144 | config K8_NUMA | 1173 | config AMD_NUMA |
1145 | def_bool y | 1174 | def_bool y |
1146 | prompt "Old style AMD Opteron NUMA detection" | 1175 | prompt "Old style AMD Opteron NUMA detection" |
1147 | depends on X86_64 && NUMA && PCI | 1176 | depends on X86_64 && NUMA && PCI |
1148 | ---help--- | 1177 | ---help--- |
1149 | Enable K8 NUMA node topology detection. You should say Y here if | 1178 | Enable AMD NUMA node topology detection. You should say Y here if |
1150 | you have a multi processor AMD K8 system. This uses an old | 1179 | you have a multi processor AMD system. This uses an old method to |
1151 | method to read the NUMA configuration directly from the builtin | 1180 | read the NUMA configuration directly from the builtin Northbridge |
1152 | Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA | 1181 | of Opteron. It is recommended to use X86_64_ACPI_NUMA instead, |
1153 | instead, which also takes priority if both are compiled in. | 1182 | which also takes priority if both are compiled in. |
1154 | 1183 | ||
1155 | config X86_64_ACPI_NUMA | 1184 | config X86_64_ACPI_NUMA |
1156 | def_bool y | 1185 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414e..45143bbcfe5e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST | |||
117 | feature as well as for the change_page_attr() infrastructure. | 117 | feature as well as for the change_page_attr() infrastructure. |
118 | If in doubt, say "N" | 118 | If in doubt, say "N" |
119 | 119 | ||
120 | config DEBUG_SET_MODULE_RONX | ||
121 | bool "Set loadable kernel module data as NX and text as RO" | ||
122 | depends on MODULES | ||
123 | ---help--- | ||
124 | This option helps catch unintended modifications to loadable | ||
125 | kernel module's text and read-only data. It also prevents execution | ||
126 | of module data. Such protection may interfere with run-time code | ||
127 | patching and dynamic kernel tracing - and they might also protect | ||
128 | against certain classes of kernel exploits. | ||
129 | If in doubt, say "N". | ||
130 | |||
120 | config DEBUG_NX_TEST | 131 | config DEBUG_NX_TEST |
121 | tristate "Testcase for the NX non-executable stack feature" | 132 | tristate "Testcase for the NX non-executable stack feature" |
122 | depends on DEBUG_KERNEL && m | 133 | depends on DEBUG_KERNEL && m |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 52f85a196fa0..35af09d13dc1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -182,7 +182,7 @@ no_longmode: | |||
182 | hlt | 182 | hlt |
183 | jmp 1b | 183 | jmp 1b |
184 | 184 | ||
185 | #include "../../kernel/verify_cpu_64.S" | 185 | #include "../../kernel/verify_cpu.S" |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * Be careful here startup_64 needs to be at a predictable | 188 | * Be careful here startup_64 needs to be at a predictable |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4a2adaa9aefc..13009d1af99a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, | |||
66 | extern void alternatives_smp_module_del(struct module *mod); | 66 | extern void alternatives_smp_module_del(struct module *mod); |
67 | extern void alternatives_smp_switch(int smp); | 67 | extern void alternatives_smp_switch(int smp); |
68 | extern int alternatives_text_reserved(void *start, void *end); | 68 | extern int alternatives_text_reserved(void *start, void *end); |
69 | extern bool skip_smp_alternatives; | ||
69 | #else | 70 | #else |
70 | static inline void alternatives_smp_module_add(struct module *mod, char *name, | 71 | static inline void alternatives_smp_module_add(struct module *mod, char *name, |
71 | void *locks, void *locks_end, | 72 | void *locks, void *locks_end, |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index c8517f81b21e..6aee50d655d1 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -3,36 +3,53 @@ | |||
3 | 3 | ||
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | 5 | ||
6 | extern struct pci_device_id k8_nb_ids[]; | 6 | extern struct pci_device_id amd_nb_misc_ids[]; |
7 | struct bootnode; | 7 | struct bootnode; |
8 | 8 | ||
9 | extern int early_is_k8_nb(u32 value); | 9 | extern int early_is_amd_nb(u32 value); |
10 | extern int cache_k8_northbridges(void); | 10 | extern int amd_cache_northbridges(void); |
11 | extern void k8_flush_garts(void); | 11 | extern void amd_flush_garts(void); |
12 | extern int k8_get_nodes(struct bootnode *nodes); | 12 | extern int amd_get_nodes(struct bootnode *nodes); |
13 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | 13 | extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); |
14 | extern int k8_scan_nodes(void); | 14 | extern int amd_scan_nodes(void); |
15 | 15 | ||
16 | struct k8_northbridge_info { | 16 | struct amd_northbridge { |
17 | struct pci_dev *misc; | ||
18 | }; | ||
19 | |||
20 | struct amd_northbridge_info { | ||
17 | u16 num; | 21 | u16 num; |
18 | u8 gart_supported; | 22 | u64 flags; |
19 | struct pci_dev **nb_misc; | 23 | struct amd_northbridge *nb; |
20 | }; | 24 | }; |
21 | extern struct k8_northbridge_info k8_northbridges; | 25 | extern struct amd_northbridge_info amd_northbridges; |
26 | |||
27 | #define AMD_NB_GART 0x1 | ||
28 | #define AMD_NB_L3_INDEX_DISABLE 0x2 | ||
22 | 29 | ||
23 | #ifdef CONFIG_AMD_NB | 30 | #ifdef CONFIG_AMD_NB |
24 | 31 | ||
25 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 32 | static inline int amd_nb_num(void) |
26 | { | 33 | { |
27 | return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; | 34 | return amd_northbridges.num; |
28 | } | 35 | } |
29 | 36 | ||
30 | #else | 37 | static inline int amd_nb_has_feature(int feature) |
38 | { | ||
39 | return ((amd_northbridges.flags & feature) == feature); | ||
40 | } | ||
31 | 41 | ||
32 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 42 | static inline struct amd_northbridge *node_to_amd_nb(int node) |
33 | { | 43 | { |
34 | return NULL; | 44 | return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; |
35 | } | 45 | } |
46 | |||
47 | #else | ||
48 | |||
49 | #define amd_nb_num(x) 0 | ||
50 | #define amd_nb_has_feature(x) false | ||
51 | #define node_to_amd_nb(x) NULL | ||
52 | |||
36 | #endif | 53 | #endif |
37 | 54 | ||
38 | 55 | ||
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f6ce0bda3b98..cf12007796db 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void); | |||
238 | extern void setup_secondary_APIC_clock(void); | 238 | extern void setup_secondary_APIC_clock(void); |
239 | extern int APIC_init_uniprocessor(void); | 239 | extern int APIC_init_uniprocessor(void); |
240 | extern void enable_NMI_through_LVT0(void); | 240 | extern void enable_NMI_through_LVT0(void); |
241 | extern int apic_force_enable(void); | ||
241 | 242 | ||
242 | /* | 243 | /* |
243 | * On 32bit this is mach-xxx local | 244 | * On 32bit this is mach-xxx local |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index a859ca461fb0..47a30ff8e517 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -145,6 +145,7 @@ | |||
145 | 145 | ||
146 | #ifdef CONFIG_X86_32 | 146 | #ifdef CONFIG_X86_32 |
147 | # define MAX_IO_APICS 64 | 147 | # define MAX_IO_APICS 64 |
148 | # define MAX_LOCAL_APIC 256 | ||
148 | #else | 149 | #else |
149 | # define MAX_IO_APICS 128 | 150 | # define MAX_IO_APICS 128 |
150 | # define MAX_LOCAL_APIC 32768 | 151 | # define MAX_LOCAL_APIC 32768 |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 8e6218550e77..c8bfe63a06de 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -124,6 +124,7 @@ enum { | |||
124 | X86_SUBARCH_LGUEST, | 124 | X86_SUBARCH_LGUEST, |
125 | X86_SUBARCH_XEN, | 125 | X86_SUBARCH_XEN, |
126 | X86_SUBARCH_MRST, | 126 | X86_SUBARCH_MRST, |
127 | X86_SUBARCH_CE4100, | ||
127 | X86_NR_SUBARCHS, | 128 | X86_NR_SUBARCHS, |
128 | }; | 129 | }; |
129 | 130 | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 9479a037419f..0141b234406f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -117,6 +117,10 @@ enum fixed_addresses { | |||
117 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ | 117 | FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ |
118 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ | 118 | FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ |
119 | __end_of_permanent_fixed_addresses, | 119 | __end_of_permanent_fixed_addresses, |
120 | |||
121 | #ifdef CONFIG_X86_MRST | ||
122 | FIX_LNW_VRTC, | ||
123 | #endif | ||
120 | /* | 124 | /* |
121 | * 256 temporary boot-time mappings, used by early_ioremap(), | 125 | * 256 temporary boot-time mappings, used by early_ioremap(), |
122 | * before ioremap() is functional. | 126 | * before ioremap() is functional. |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 4aa2bb3b242a..ef328901c802 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
93 | int err; | 93 | int err; |
94 | 94 | ||
95 | /* See comment in fxsave() below. */ | 95 | /* See comment in fxsave() below. */ |
96 | #ifdef CONFIG_AS_FXSAVEQ | ||
97 | asm volatile("1: fxrstorq %[fx]\n\t" | ||
98 | "2:\n" | ||
99 | ".section .fixup,\"ax\"\n" | ||
100 | "3: movl $-1,%[err]\n" | ||
101 | " jmp 2b\n" | ||
102 | ".previous\n" | ||
103 | _ASM_EXTABLE(1b, 3b) | ||
104 | : [err] "=r" (err) | ||
105 | : [fx] "m" (*fx), "0" (0)); | ||
106 | #else | ||
96 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | 107 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" |
97 | "2:\n" | 108 | "2:\n" |
98 | ".section .fixup,\"ax\"\n" | 109 | ".section .fixup,\"ax\"\n" |
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
102 | _ASM_EXTABLE(1b, 3b) | 113 | _ASM_EXTABLE(1b, 3b) |
103 | : [err] "=r" (err) | 114 | : [err] "=r" (err) |
104 | : [fx] "R" (fx), "m" (*fx), "0" (0)); | 115 | : [fx] "R" (fx), "m" (*fx), "0" (0)); |
116 | #endif | ||
105 | return err; | 117 | return err; |
106 | } | 118 | } |
107 | 119 | ||
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
119 | return -EFAULT; | 131 | return -EFAULT; |
120 | 132 | ||
121 | /* See comment in fxsave() below. */ | 133 | /* See comment in fxsave() below. */ |
134 | #ifdef CONFIG_AS_FXSAVEQ | ||
135 | asm volatile("1: fxsaveq %[fx]\n\t" | ||
136 | "2:\n" | ||
137 | ".section .fixup,\"ax\"\n" | ||
138 | "3: movl $-1,%[err]\n" | ||
139 | " jmp 2b\n" | ||
140 | ".previous\n" | ||
141 | _ASM_EXTABLE(1b, 3b) | ||
142 | : [err] "=r" (err), [fx] "=m" (*fx) | ||
143 | : "0" (0)); | ||
144 | #else | ||
122 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | 145 | asm volatile("1: rex64/fxsave (%[fx])\n\t" |
123 | "2:\n" | 146 | "2:\n" |
124 | ".section .fixup,\"ax\"\n" | 147 | ".section .fixup,\"ax\"\n" |
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
128 | _ASM_EXTABLE(1b, 3b) | 151 | _ASM_EXTABLE(1b, 3b) |
129 | : [err] "=r" (err), "=m" (*fx) | 152 | : [err] "=r" (err), "=m" (*fx) |
130 | : [fx] "R" (fx), "0" (0)); | 153 | : [fx] "R" (fx), "0" (0)); |
154 | #endif | ||
131 | if (unlikely(err) && | 155 | if (unlikely(err) && |
132 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) | 156 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) |
133 | err = -EFAULT; | 157 | err = -EFAULT; |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a6b28d017c2f..0c5ca4e30d7b 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -159,7 +159,7 @@ struct io_apic_irq_attr; | |||
159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
160 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
161 | void setup_IO_APIC_irq_extra(u32 gsi); | 161 | void setup_IO_APIC_irq_extra(u32 gsi); |
162 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_and_gsi_init(void); |
163 | extern void ioapic_insert_resources(void); | 163 | extern void ioapic_insert_resources(void); |
164 | 164 | ||
165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); |
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | |||
168 | extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 168 | extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
169 | extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 169 | extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
170 | 170 | ||
171 | extern void probe_nr_irqs_gsi(void); | ||
172 | extern int get_nr_irqs_gsi(void); | 171 | extern int get_nr_irqs_gsi(void); |
173 | |||
174 | extern void setup_ioapic_ids_from_mpc(void); | 172 | extern void setup_ioapic_ids_from_mpc(void); |
173 | extern void setup_ioapic_ids_from_mpc_nocheck(void); | ||
175 | 174 | ||
176 | struct mp_ioapic_gsi{ | 175 | struct mp_ioapic_gsi{ |
177 | u32 gsi_base; | 176 | u32 gsi_base; |
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void); | |||
189 | #define io_apic_assign_pci_irqs 0 | 188 | #define io_apic_assign_pci_irqs 0 |
190 | #define setup_ioapic_ids_from_mpc x86_init_noop | 189 | #define setup_ioapic_ids_from_mpc x86_init_noop |
191 | static const int timer_through_8259 = 0; | 190 | static const int timer_through_8259 = 0; |
192 | static inline void ioapic_init_mappings(void) { } | 191 | static inline void ioapic_and_gsi_init(void) { } |
193 | static inline void ioapic_insert_resources(void) { } | 192 | static inline void ioapic_insert_resources(void) { } |
194 | static inline void probe_nr_irqs_gsi(void) { } | ||
195 | #define gsi_top (NR_IRQS_LEGACY) | 193 | #define gsi_top (NR_IRQS_LEGACY) |
196 | static inline int mp_find_ioapic(u32 gsi) { return 0; } | 194 | static inline int mp_find_ioapic(u32 gsi) { return 0; } |
197 | 195 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c62c13cb9788..eb16e94ae04f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c); | |||
223 | 223 | ||
224 | void mce_log_therm_throt_event(__u64 status); | 224 | void mce_log_therm_throt_event(__u64 status); |
225 | 225 | ||
226 | /* Interrupt Handler for core thermal thresholds */ | ||
227 | extern int (*platform_thermal_notify)(__u64 msr_val); | ||
228 | |||
226 | #ifdef CONFIG_X86_THERMAL_VECTOR | 229 | #ifdef CONFIG_X86_THERMAL_VECTOR |
227 | extern void mcheck_intel_therm_init(void); | 230 | extern void mcheck_intel_therm_init(void); |
228 | #else | 231 | #else |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b501e22a..24215072d0e1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void) | |||
48 | 48 | ||
49 | #ifdef CONFIG_MICROCODE_AMD | 49 | #ifdef CONFIG_MICROCODE_AMD |
50 | extern struct microcode_ops * __init init_amd_microcode(void); | 50 | extern struct microcode_ops * __init init_amd_microcode(void); |
51 | |||
52 | static inline void get_ucode_data(void *to, const u8 *from, size_t n) | ||
53 | { | ||
54 | memcpy(to, from, n); | ||
55 | } | ||
56 | |||
51 | #else | 57 | #else |
52 | static inline struct microcode_ops * __init init_amd_microcode(void) | 58 | static inline struct microcode_ops * __init init_amd_microcode(void) |
53 | { | 59 | { |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c82868e9f905..0c90dd9f0505 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -5,8 +5,9 @@ | |||
5 | 5 | ||
6 | #include <asm/mpspec_def.h> | 6 | #include <asm/mpspec_def.h> |
7 | #include <asm/x86_init.h> | 7 | #include <asm/x86_init.h> |
8 | #include <asm/apicdef.h> | ||
8 | 9 | ||
9 | extern int apic_version[MAX_APICS]; | 10 | extern int apic_version[]; |
10 | extern int pic_mode; | 11 | extern int pic_mode; |
11 | 12 | ||
12 | #ifdef CONFIG_X86_32 | 13 | #ifdef CONFIG_X86_32 |
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | |||
107 | int active_high_low); | 108 | int active_high_low); |
108 | #endif /* CONFIG_ACPI */ | 109 | #endif /* CONFIG_ACPI */ |
109 | 110 | ||
110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 111 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) |
111 | 112 | ||
112 | struct physid_mask { | 113 | struct physid_mask { |
113 | unsigned long mask[PHYSID_ARRAY_SIZE]; | 114 | unsigned long mask[PHYSID_ARRAY_SIZE]; |
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t; | |||
122 | test_and_set_bit(physid, (map).mask) | 123 | test_and_set_bit(physid, (map).mask) |
123 | 124 | ||
124 | #define physids_and(dst, src1, src2) \ | 125 | #define physids_and(dst, src1, src2) \ |
125 | bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 126 | bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) |
126 | 127 | ||
127 | #define physids_or(dst, src1, src2) \ | 128 | #define physids_or(dst, src1, src2) \ |
128 | bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 129 | bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) |
129 | 130 | ||
130 | #define physids_clear(map) \ | 131 | #define physids_clear(map) \ |
131 | bitmap_zero((map).mask, MAX_APICS) | 132 | bitmap_zero((map).mask, MAX_LOCAL_APIC) |
132 | 133 | ||
133 | #define physids_complement(dst, src) \ | 134 | #define physids_complement(dst, src) \ |
134 | bitmap_complement((dst).mask, (src).mask, MAX_APICS) | 135 | bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) |
135 | 136 | ||
136 | #define physids_empty(map) \ | 137 | #define physids_empty(map) \ |
137 | bitmap_empty((map).mask, MAX_APICS) | 138 | bitmap_empty((map).mask, MAX_LOCAL_APIC) |
138 | 139 | ||
139 | #define physids_equal(map1, map2) \ | 140 | #define physids_equal(map1, map2) \ |
140 | bitmap_equal((map1).mask, (map2).mask, MAX_APICS) | 141 | bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) |
141 | 142 | ||
142 | #define physids_weight(map) \ | 143 | #define physids_weight(map) \ |
143 | bitmap_weight((map).mask, MAX_APICS) | 144 | bitmap_weight((map).mask, MAX_LOCAL_APIC) |
144 | 145 | ||
145 | #define physids_shift_right(d, s, n) \ | 146 | #define physids_shift_right(d, s, n) \ |
146 | bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) | 147 | bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) |
147 | 148 | ||
148 | #define physids_shift_left(d, s, n) \ | 149 | #define physids_shift_left(d, s, n) \ |
149 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) | 150 | bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) |
150 | 151 | ||
151 | static inline unsigned long physids_coerce(physid_mask_t *map) | 152 | static inline unsigned long physids_coerce(physid_mask_t *map) |
152 | { | 153 | { |
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map) | |||
159 | map->mask[0] = physids; | 160 | map->mask[0] = physids; |
160 | } | 161 | } |
161 | 162 | ||
162 | /* Note: will create very large stack frames if physid_mask_t is big */ | ||
163 | #define physid_mask_of_physid(physid) \ | ||
164 | ({ \ | ||
165 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | ||
166 | physid_set(physid, __physid_mask); \ | ||
167 | __physid_mask; \ | ||
168 | }) | ||
169 | |||
170 | static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) | 163 | static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) |
171 | { | 164 | { |
172 | physids_clear(*map); | 165 | physids_clear(*map); |
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 4a7f96d7c188..c0a955a9a087 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h | |||
@@ -15,13 +15,6 @@ | |||
15 | 15 | ||
16 | #ifdef CONFIG_X86_32 | 16 | #ifdef CONFIG_X86_32 |
17 | # define MAX_MPC_ENTRY 1024 | 17 | # define MAX_MPC_ENTRY 1024 |
18 | # define MAX_APICS 256 | ||
19 | #else | ||
20 | # if NR_CPUS <= 255 | ||
21 | # define MAX_APICS 255 | ||
22 | # else | ||
23 | # define MAX_APICS 32768 | ||
24 | # endif | ||
25 | #endif | 18 | #endif |
26 | 19 | ||
27 | /* Intel MP Floating Pointer Structure */ | 20 | /* Intel MP Floating Pointer Structure */ |
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h new file mode 100644 index 000000000000..73668abdbedf --- /dev/null +++ b/arch/x86/include/asm/mrst-vrtc.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef _MRST_VRTC_H | ||
2 | #define _MRST_VRTC_H | ||
3 | |||
4 | extern unsigned char vrtc_cmos_read(unsigned char reg); | ||
5 | extern void vrtc_cmos_write(unsigned char val, unsigned char reg); | ||
6 | extern unsigned long vrtc_get_time(void); | ||
7 | extern int vrtc_set_mmss(unsigned long nowtime); | ||
8 | |||
9 | #endif | ||
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 4a711a684b17..719f00b28ff5 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h | |||
@@ -14,7 +14,9 @@ | |||
14 | #include <linux/sfi.h> | 14 | #include <linux/sfi.h> |
15 | 15 | ||
16 | extern int pci_mrst_init(void); | 16 | extern int pci_mrst_init(void); |
17 | int __init sfi_parse_mrtc(struct sfi_table_header *table); | 17 | extern int __init sfi_parse_mrtc(struct sfi_table_header *table); |
18 | extern int sfi_mrtc_num; | ||
19 | extern struct sfi_rtc_table_entry sfi_mrtc_array[]; | ||
18 | 20 | ||
19 | /* | 21 | /* |
20 | * Medfield is the follow-up of Moorestown, it combines two chip solution into | 22 | * Medfield is the follow-up of Moorestown, it combines two chip solution into |
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void); | |||
50 | 52 | ||
51 | extern struct console early_hsu_console; | 53 | extern struct console early_hsu_console; |
52 | extern void hsu_early_console_init(void); | 54 | extern void hsu_early_console_init(void); |
55 | |||
56 | extern void intel_scu_devices_create(void); | ||
57 | extern void intel_scu_devices_destroy(void); | ||
58 | |||
59 | /* VRTC timer */ | ||
60 | #define MRST_VRTC_MAP_SZ (1024) | ||
61 | /*#define MRST_VRTC_PGOFFSET (0xc00) */ | ||
62 | |||
63 | extern void mrst_rtc_init(void); | ||
64 | |||
53 | #endif /* _ASM_X86_MRST_H */ | 65 | #endif /* _ASM_X86_MRST_H */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 86030f63ba02..4d0dfa0d998e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -257,6 +257,18 @@ | |||
257 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) | 257 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) |
258 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) | 258 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) |
259 | 259 | ||
260 | /* Thermal Thresholds Support */ | ||
261 | #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) | ||
262 | #define THERM_SHIFT_THRESHOLD0 8 | ||
263 | #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) | ||
264 | #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) | ||
265 | #define THERM_SHIFT_THRESHOLD1 16 | ||
266 | #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) | ||
267 | #define THERM_STATUS_THRESHOLD0 (1 << 6) | ||
268 | #define THERM_LOG_THRESHOLD0 (1 << 7) | ||
269 | #define THERM_STATUS_THRESHOLD1 (1 << 8) | ||
270 | #define THERM_LOG_THRESHOLD1 (1 << 9) | ||
271 | |||
260 | /* MISC_ENABLE bits: architectural */ | 272 | /* MISC_ENABLE bits: architectural */ |
261 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 273 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) |
262 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 274 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ef9975812c77..7709c12431b8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -112,7 +112,7 @@ static inline void arch_safe_halt(void) | |||
112 | 112 | ||
113 | static inline void halt(void) | 113 | static inline void halt(void) |
114 | { | 114 | { |
115 | PVOP_VCALL0(pv_irq_ops.safe_halt); | 115 | PVOP_VCALL0(pv_irq_ops.halt); |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline void wbinvd(void) | 118 | static inline void wbinvd(void) |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ca0437c714b2..676129229630 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start; | |||
65 | 65 | ||
66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 | 66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 |
67 | 67 | ||
68 | extern int pcibios_enabled; | ||
68 | void pcibios_config_init(void); | 69 | void pcibios_config_init(void); |
69 | struct pci_bus *pcibios_scan_root(int bus); | 70 | struct pci_bus *pcibios_scan_root(int bus); |
70 | 71 | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index d6763b139a84..db8aa19a08a2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void); | |||
53 | static inline void x86_mrst_early_setup(void) { } | 53 | static inline void x86_mrst_early_setup(void) { } |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #ifdef CONFIG_X86_INTEL_CE | ||
57 | extern void x86_ce4100_early_setup(void); | ||
58 | #else | ||
59 | static inline void x86_ce4100_early_setup(void) { } | ||
60 | #endif | ||
61 | |||
56 | #ifndef _SETUP | 62 | #ifndef _SETUP |
57 | 63 | ||
58 | /* | 64 | /* |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 42d412fd8b02..ce1d54c8a433 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -26,20 +26,22 @@ | |||
26 | * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, | 26 | * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, |
27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. |
28 | * | 28 | * |
29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | 29 | * We will use one set for sending BAU messages from each of the |
30 | * cpu's on the uvhub. | 30 | * cpu's on the uvhub. |
31 | * | 31 | * |
32 | * TLB shootdown will use the first of the 8 descriptors of each set. | 32 | * TLB shootdown will use the first of the 8 descriptors of each set. |
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define MAX_CPUS_PER_UVHUB 64 | ||
37 | #define MAX_CPUS_PER_SOCKET 32 | ||
38 | #define UV_ADP_SIZE 64 /* hardware-provided max. */ | ||
39 | #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ | ||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 40 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
37 | /* the 'throttle' to prevent the hardware stay-busy bug */ | 41 | /* the 'throttle' to prevent the hardware stay-busy bug */ |
38 | #define MAX_BAU_CONCURRENT 3 | 42 | #define MAX_BAU_CONCURRENT 3 |
39 | #define UV_CPUS_PER_ACT_STATUS 32 | ||
40 | #define UV_ACT_STATUS_MASK 0x3 | 43 | #define UV_ACT_STATUS_MASK 0x3 |
41 | #define UV_ACT_STATUS_SIZE 2 | 44 | #define UV_ACT_STATUS_SIZE 2 |
42 | #define UV_ADP_SIZE 32 | ||
43 | #define UV_DISTRIBUTION_SIZE 256 | 45 | #define UV_DISTRIBUTION_SIZE 256 |
44 | #define UV_SW_ACK_NPENDING 8 | 46 | #define UV_SW_ACK_NPENDING 8 |
45 | #define UV_NET_ENDPOINT_INTD 0x38 | 47 | #define UV_NET_ENDPOINT_INTD 0x38 |
@@ -100,7 +102,6 @@ | |||
100 | * number of destination side software ack resources | 102 | * number of destination side software ack resources |
101 | */ | 103 | */ |
102 | #define DEST_NUM_RESOURCES 8 | 104 | #define DEST_NUM_RESOURCES 8 |
103 | #define MAX_CPUS_PER_NODE 32 | ||
104 | /* | 105 | /* |
105 | * completion statuses for sending a TLB flush message | 106 | * completion statuses for sending a TLB flush message |
106 | */ | 107 | */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1e994754d323..34244b2cd880 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | |||
85 | obj-$(CONFIG_KGDB) += kgdb.o | 85 | obj-$(CONFIG_KGDB) += kgdb.o |
86 | obj-$(CONFIG_VM86) += vm86_32.o | 86 | obj-$(CONFIG_VM86) += vm86_32.o |
87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
88 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | ||
89 | 88 | ||
90 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 89 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
91 | obj-$(CONFIG_APB_TIMER) += apb_timer.o | 90 | obj-$(CONFIG_APB_TIMER) += apb_timer.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 71232b941b6c..17c8090fabd4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) | |||
198 | { | 198 | { |
199 | unsigned int ver = 0; | 199 | unsigned int ver = 0; |
200 | 200 | ||
201 | if (id >= (MAX_LOCAL_APIC-1)) { | ||
202 | printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); | ||
203 | return; | ||
204 | } | ||
205 | |||
201 | if (!enabled) { | 206 | if (!enabled) { |
202 | ++disabled_cpus; | 207 | ++disabled_cpus; |
203 | return; | 208 | return; |
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
910 | acpi_register_lapic_address(acpi_lapic_addr); | 915 | acpi_register_lapic_address(acpi_lapic_addr); |
911 | 916 | ||
912 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, | 917 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, |
913 | acpi_parse_sapic, MAX_APICS); | 918 | acpi_parse_sapic, MAX_LOCAL_APIC); |
914 | 919 | ||
915 | if (!count) { | 920 | if (!count) { |
916 | x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, | 921 | x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, |
917 | acpi_parse_x2apic, MAX_APICS); | 922 | acpi_parse_x2apic, MAX_LOCAL_APIC); |
918 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, | 923 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, |
919 | acpi_parse_lapic, MAX_APICS); | 924 | acpi_parse_lapic, MAX_LOCAL_APIC); |
920 | } | 925 | } |
921 | if (!count && !x2count) { | 926 | if (!count && !x2count) { |
922 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); | 927 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 553d0b0d639b..123608531c8f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) | |||
353 | mutex_unlock(&smp_alt); | 353 | mutex_unlock(&smp_alt); |
354 | } | 354 | } |
355 | 355 | ||
356 | bool skip_smp_alternatives; | ||
356 | void alternatives_smp_switch(int smp) | 357 | void alternatives_smp_switch(int smp) |
357 | { | 358 | { |
358 | struct smp_alt_module *mod; | 359 | struct smp_alt_module *mod; |
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp) | |||
368 | printk("lockdep: fixing up alternatives.\n"); | 369 | printk("lockdep: fixing up alternatives.\n"); |
369 | #endif | 370 | #endif |
370 | 371 | ||
371 | if (noreplace_smp || smp_alt_once) | 372 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) |
372 | return; | 373 | return; |
373 | BUG_ON(!smp && (num_online_cpus() > 1)); | 374 | BUG_ON(!smp && (num_online_cpus() > 1)); |
374 | 375 | ||
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 8f6463d8ed0d..affacb5e0065 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -12,95 +12,116 @@ | |||
12 | 12 | ||
13 | static u32 *flush_words; | 13 | static u32 *flush_words; |
14 | 14 | ||
15 | struct pci_device_id k8_nb_ids[] = { | 15 | struct pci_device_id amd_nb_misc_ids[] = { |
16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, | 18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, |
19 | {} | 19 | {} |
20 | }; | 20 | }; |
21 | EXPORT_SYMBOL(k8_nb_ids); | 21 | EXPORT_SYMBOL(amd_nb_misc_ids); |
22 | 22 | ||
23 | struct k8_northbridge_info k8_northbridges; | 23 | struct amd_northbridge_info amd_northbridges; |
24 | EXPORT_SYMBOL(k8_northbridges); | 24 | EXPORT_SYMBOL(amd_northbridges); |
25 | 25 | ||
26 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) | 26 | static struct pci_dev *next_northbridge(struct pci_dev *dev, |
27 | struct pci_device_id *ids) | ||
27 | { | 28 | { |
28 | do { | 29 | do { |
29 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | 30 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); |
30 | if (!dev) | 31 | if (!dev) |
31 | break; | 32 | break; |
32 | } while (!pci_match_id(&k8_nb_ids[0], dev)); | 33 | } while (!pci_match_id(ids, dev)); |
33 | return dev; | 34 | return dev; |
34 | } | 35 | } |
35 | 36 | ||
36 | int cache_k8_northbridges(void) | 37 | int amd_cache_northbridges(void) |
37 | { | 38 | { |
38 | int i; | 39 | int i = 0; |
39 | struct pci_dev *dev; | 40 | struct amd_northbridge *nb; |
41 | struct pci_dev *misc; | ||
40 | 42 | ||
41 | if (k8_northbridges.num) | 43 | if (amd_nb_num()) |
42 | return 0; | 44 | return 0; |
43 | 45 | ||
44 | dev = NULL; | 46 | misc = NULL; |
45 | while ((dev = next_k8_northbridge(dev)) != NULL) | 47 | while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) |
46 | k8_northbridges.num++; | 48 | i++; |
47 | 49 | ||
48 | /* some CPU families (e.g. family 0x11) do not support GART */ | 50 | if (i == 0) |
49 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || | 51 | return 0; |
50 | boot_cpu_data.x86 == 0x15) | ||
51 | k8_northbridges.gart_supported = 1; | ||
52 | 52 | ||
53 | k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * | 53 | nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); |
54 | sizeof(void *), GFP_KERNEL); | 54 | if (!nb) |
55 | if (!k8_northbridges.nb_misc) | ||
56 | return -ENOMEM; | 55 | return -ENOMEM; |
57 | 56 | ||
58 | if (!k8_northbridges.num) { | 57 | amd_northbridges.nb = nb; |
59 | k8_northbridges.nb_misc[0] = NULL; | 58 | amd_northbridges.num = i; |
60 | return 0; | ||
61 | } | ||
62 | 59 | ||
63 | if (k8_northbridges.gart_supported) { | 60 | misc = NULL; |
64 | flush_words = kmalloc(k8_northbridges.num * sizeof(u32), | 61 | for (i = 0; i != amd_nb_num(); i++) { |
65 | GFP_KERNEL); | 62 | node_to_amd_nb(i)->misc = misc = |
66 | if (!flush_words) { | 63 | next_northbridge(misc, amd_nb_misc_ids); |
67 | kfree(k8_northbridges.nb_misc); | 64 | } |
68 | return -ENOMEM; | 65 | |
69 | } | 66 | /* some CPU families (e.g. family 0x11) do not support GART */ |
70 | } | 67 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || |
68 | boot_cpu_data.x86 == 0x15) | ||
69 | amd_northbridges.flags |= AMD_NB_GART; | ||
70 | |||
71 | /* | ||
72 | * Some CPU families support L3 Cache Index Disable. There are some | ||
73 | * limitations because of E382 and E388 on family 0x10. | ||
74 | */ | ||
75 | if (boot_cpu_data.x86 == 0x10 && | ||
76 | boot_cpu_data.x86_model >= 0x8 && | ||
77 | (boot_cpu_data.x86_model > 0x9 || | ||
78 | boot_cpu_data.x86_mask >= 0x1)) | ||
79 | amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; | ||
71 | 80 | ||
72 | dev = NULL; | ||
73 | i = 0; | ||
74 | while ((dev = next_k8_northbridge(dev)) != NULL) { | ||
75 | k8_northbridges.nb_misc[i] = dev; | ||
76 | if (k8_northbridges.gart_supported) | ||
77 | pci_read_config_dword(dev, 0x9c, &flush_words[i++]); | ||
78 | } | ||
79 | k8_northbridges.nb_misc[i] = NULL; | ||
80 | return 0; | 81 | return 0; |
81 | } | 82 | } |
82 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); | 83 | EXPORT_SYMBOL_GPL(amd_cache_northbridges); |
83 | 84 | ||
84 | /* Ignores subdevice/subvendor but as far as I can figure out | 85 | /* Ignores subdevice/subvendor but as far as I can figure out |
85 | they're useless anyways */ | 86 | they're useless anyways */ |
86 | int __init early_is_k8_nb(u32 device) | 87 | int __init early_is_amd_nb(u32 device) |
87 | { | 88 | { |
88 | struct pci_device_id *id; | 89 | struct pci_device_id *id; |
89 | u32 vendor = device & 0xffff; | 90 | u32 vendor = device & 0xffff; |
90 | device >>= 16; | 91 | device >>= 16; |
91 | for (id = k8_nb_ids; id->vendor; id++) | 92 | for (id = amd_nb_misc_ids; id->vendor; id++) |
92 | if (vendor == id->vendor && device == id->device) | 93 | if (vendor == id->vendor && device == id->device) |
93 | return 1; | 94 | return 1; |
94 | return 0; | 95 | return 0; |
95 | } | 96 | } |
96 | 97 | ||
97 | void k8_flush_garts(void) | 98 | int amd_cache_gart(void) |
99 | { | ||
100 | int i; | ||
101 | |||
102 | if (!amd_nb_has_feature(AMD_NB_GART)) | ||
103 | return 0; | ||
104 | |||
105 | flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); | ||
106 | if (!flush_words) { | ||
107 | amd_northbridges.flags &= ~AMD_NB_GART; | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | for (i = 0; i != amd_nb_num(); i++) | ||
112 | pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, | ||
113 | &flush_words[i]); | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | void amd_flush_garts(void) | ||
98 | { | 119 | { |
99 | int flushed, i; | 120 | int flushed, i; |
100 | unsigned long flags; | 121 | unsigned long flags; |
101 | static DEFINE_SPINLOCK(gart_lock); | 122 | static DEFINE_SPINLOCK(gart_lock); |
102 | 123 | ||
103 | if (!k8_northbridges.gart_supported) | 124 | if (!amd_nb_has_feature(AMD_NB_GART)) |
104 | return; | 125 | return; |
105 | 126 | ||
106 | /* Avoid races between AGP and IOMMU. In theory it's not needed | 127 | /* Avoid races between AGP and IOMMU. In theory it's not needed |
@@ -109,16 +130,16 @@ void k8_flush_garts(void) | |||
109 | that it doesn't matter to serialize more. -AK */ | 130 | that it doesn't matter to serialize more. -AK */ |
110 | spin_lock_irqsave(&gart_lock, flags); | 131 | spin_lock_irqsave(&gart_lock, flags); |
111 | flushed = 0; | 132 | flushed = 0; |
112 | for (i = 0; i < k8_northbridges.num; i++) { | 133 | for (i = 0; i < amd_nb_num(); i++) { |
113 | pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, | 134 | pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, |
114 | flush_words[i]|1); | 135 | flush_words[i] | 1); |
115 | flushed++; | 136 | flushed++; |
116 | } | 137 | } |
117 | for (i = 0; i < k8_northbridges.num; i++) { | 138 | for (i = 0; i < amd_nb_num(); i++) { |
118 | u32 w; | 139 | u32 w; |
119 | /* Make sure the hardware actually executed the flush*/ | 140 | /* Make sure the hardware actually executed the flush*/ |
120 | for (;;) { | 141 | for (;;) { |
121 | pci_read_config_dword(k8_northbridges.nb_misc[i], | 142 | pci_read_config_dword(node_to_amd_nb(i)->misc, |
122 | 0x9c, &w); | 143 | 0x9c, &w); |
123 | if (!(w & 1)) | 144 | if (!(w & 1)) |
124 | break; | 145 | break; |
@@ -129,19 +150,23 @@ void k8_flush_garts(void) | |||
129 | if (!flushed) | 150 | if (!flushed) |
130 | printk("nothing to flush?\n"); | 151 | printk("nothing to flush?\n"); |
131 | } | 152 | } |
132 | EXPORT_SYMBOL_GPL(k8_flush_garts); | 153 | EXPORT_SYMBOL_GPL(amd_flush_garts); |
133 | 154 | ||
134 | static __init int init_k8_nbs(void) | 155 | static __init int init_amd_nbs(void) |
135 | { | 156 | { |
136 | int err = 0; | 157 | int err = 0; |
137 | 158 | ||
138 | err = cache_k8_northbridges(); | 159 | err = amd_cache_northbridges(); |
139 | 160 | ||
140 | if (err < 0) | 161 | if (err < 0) |
141 | printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); | 162 | printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); |
163 | |||
164 | if (amd_cache_gart() < 0) | ||
165 | printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " | ||
166 | "GART support disabled.\n"); | ||
142 | 167 | ||
143 | return err; | 168 | return err; |
144 | } | 169 | } |
145 | 170 | ||
146 | /* This has to go after the PCI subsystem */ | 171 | /* This has to go after the PCI subsystem */ |
147 | fs_initcall(init_k8_nbs); | 172 | fs_initcall(init_amd_nbs); |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 92543c73cf8e..7c9ab59653e8 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev) | |||
315 | 315 | ||
316 | if (system_state == SYSTEM_BOOTING) { | 316 | if (system_state == SYSTEM_BOOTING) { |
317 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | 317 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); |
318 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
318 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | 319 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ |
319 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); | 320 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); |
320 | if (request_irq(adev->irq, apbt_interrupt_handler, | 321 | if (request_irq(adev->irq, apbt_interrupt_handler, |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index b3a16e8f0703..dcd7c83e1659 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) | |||
206 | * Do an PCI bus scan by hand because we're running before the PCI | 206 | * Do an PCI bus scan by hand because we're running before the PCI |
207 | * subsystem. | 207 | * subsystem. |
208 | * | 208 | * |
209 | * All K8 AGP bridges are AGPv3 compliant, so we can do this scan | 209 | * All AMD AGP bridges are AGPv3 compliant, so we can do this scan |
210 | * generically. It's probably overkill to always scan all slots because | 210 | * generically. It's probably overkill to always scan all slots because |
211 | * the AGP bridges should be always an own bus on the HT hierarchy, | 211 | * the AGP bridges should be always an own bus on the HT hierarchy, |
212 | * but do it here for future safety. | 212 | * but do it here for future safety. |
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void) | |||
303 | dev_limit = bus_dev_ranges[i].dev_limit; | 303 | dev_limit = bus_dev_ranges[i].dev_limit; |
304 | 304 | ||
305 | for (slot = dev_base; slot < dev_limit; slot++) { | 305 | for (slot = dev_base; slot < dev_limit; slot++) { |
306 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 306 | if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) |
307 | continue; | 307 | continue; |
308 | 308 | ||
309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void) | |||
358 | dev_limit = bus_dev_ranges[i].dev_limit; | 358 | dev_limit = bus_dev_ranges[i].dev_limit; |
359 | 359 | ||
360 | for (slot = dev_base; slot < dev_limit; slot++) { | 360 | for (slot = dev_base; slot < dev_limit; slot++) { |
361 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 361 | if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) |
362 | continue; | 362 | continue; |
363 | 363 | ||
364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void) | |||
400 | dev_limit = bus_dev_ranges[i].dev_limit; | 400 | dev_limit = bus_dev_ranges[i].dev_limit; |
401 | 401 | ||
402 | for (slot = dev_base; slot < dev_limit; slot++) { | 402 | for (slot = dev_base; slot < dev_limit; slot++) { |
403 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 403 | if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) |
404 | continue; | 404 | continue; |
405 | 405 | ||
406 | iommu_detected = 1; | 406 | iommu_detected = 1; |
@@ -518,7 +518,7 @@ out: | |||
518 | dev_base = bus_dev_ranges[i].dev_base; | 518 | dev_base = bus_dev_ranges[i].dev_base; |
519 | dev_limit = bus_dev_ranges[i].dev_limit; | 519 | dev_limit = bus_dev_ranges[i].dev_limit; |
520 | for (slot = dev_base; slot < dev_limit; slot++) { | 520 | for (slot = dev_base; slot < dev_limit; slot++) { |
521 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 521 | if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) |
522 | continue; | 522 | continue; |
523 | 523 | ||
524 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | 524 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fb7657822aad..879999a5230f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -431,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) | |||
431 | reserved = reserve_eilvt_offset(offset, new); | 431 | reserved = reserve_eilvt_offset(offset, new); |
432 | 432 | ||
433 | if (reserved != new) { | 433 | if (reserved != new) { |
434 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " | 434 | pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " |
435 | "vector 0x%x was already reserved by another core, " | 435 | "vector 0x%x, but the register is already in use for " |
436 | "APIC%lX=0x%x\n", | 436 | "vector 0x%x on another cpu\n", |
437 | smp_processor_id(), new, reserved, reg, old); | 437 | smp_processor_id(), reg, offset, new, reserved); |
438 | return -EINVAL; | 438 | return -EINVAL; |
439 | } | 439 | } |
440 | 440 | ||
441 | if (!eilvt_entry_is_changeable(old, new)) { | 441 | if (!eilvt_entry_is_changeable(old, new)) { |
442 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " | 442 | pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " |
443 | "register already in use, APIC%lX=0x%x\n", | 443 | "vector 0x%x, but the register is already in use for " |
444 | smp_processor_id(), new, reg, old); | 444 | "vector 0x%x on this cpu\n", |
445 | smp_processor_id(), reg, offset, new, old); | ||
445 | return -EBUSY; | 446 | return -EBUSY; |
446 | } | 447 | } |
447 | 448 | ||
@@ -1532,13 +1533,60 @@ static int __init detect_init_APIC(void) | |||
1532 | return 0; | 1533 | return 0; |
1533 | } | 1534 | } |
1534 | #else | 1535 | #else |
1536 | |||
1537 | static int apic_verify(void) | ||
1538 | { | ||
1539 | u32 features, h, l; | ||
1540 | |||
1541 | /* | ||
1542 | * The APIC feature bit should now be enabled | ||
1543 | * in `cpuid' | ||
1544 | */ | ||
1545 | features = cpuid_edx(1); | ||
1546 | if (!(features & (1 << X86_FEATURE_APIC))) { | ||
1547 | pr_warning("Could not enable APIC!\n"); | ||
1548 | return -1; | ||
1549 | } | ||
1550 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1551 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
1552 | |||
1553 | /* The BIOS may have set up the APIC at some other address */ | ||
1554 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1555 | if (l & MSR_IA32_APICBASE_ENABLE) | ||
1556 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | ||
1557 | |||
1558 | pr_info("Found and enabled local APIC!\n"); | ||
1559 | return 0; | ||
1560 | } | ||
1561 | |||
1562 | int apic_force_enable(void) | ||
1563 | { | ||
1564 | u32 h, l; | ||
1565 | |||
1566 | if (disable_apic) | ||
1567 | return -1; | ||
1568 | |||
1569 | /* | ||
1570 | * Some BIOSes disable the local APIC in the APIC_BASE | ||
1571 | * MSR. This can only be done in software for Intel P6 or later | ||
1572 | * and AMD K7 (Model > 1) or later. | ||
1573 | */ | ||
1574 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1575 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | ||
1576 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); | ||
1577 | l &= ~MSR_IA32_APICBASE_BASE; | ||
1578 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | ||
1579 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
1580 | enabled_via_apicbase = 1; | ||
1581 | } | ||
1582 | return apic_verify(); | ||
1583 | } | ||
1584 | |||
1535 | /* | 1585 | /* |
1536 | * Detect and initialize APIC | 1586 | * Detect and initialize APIC |
1537 | */ | 1587 | */ |
1538 | static int __init detect_init_APIC(void) | 1588 | static int __init detect_init_APIC(void) |
1539 | { | 1589 | { |
1540 | u32 h, l, features; | ||
1541 | |||
1542 | /* Disabled by kernel option? */ | 1590 | /* Disabled by kernel option? */ |
1543 | if (disable_apic) | 1591 | if (disable_apic) |
1544 | return -1; | 1592 | return -1; |
@@ -1568,38 +1616,12 @@ static int __init detect_init_APIC(void) | |||
1568 | "you can enable it with \"lapic\"\n"); | 1616 | "you can enable it with \"lapic\"\n"); |
1569 | return -1; | 1617 | return -1; |
1570 | } | 1618 | } |
1571 | /* | 1619 | if (apic_force_enable()) |
1572 | * Some BIOSes disable the local APIC in the APIC_BASE | 1620 | return -1; |
1573 | * MSR. This can only be done in software for Intel P6 or later | 1621 | } else { |
1574 | * and AMD K7 (Model > 1) or later. | 1622 | if (apic_verify()) |
1575 | */ | 1623 | return -1; |
1576 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1577 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | ||
1578 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); | ||
1579 | l &= ~MSR_IA32_APICBASE_BASE; | ||
1580 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | ||
1581 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
1582 | enabled_via_apicbase = 1; | ||
1583 | } | ||
1584 | } | ||
1585 | /* | ||
1586 | * The APIC feature bit should now be enabled | ||
1587 | * in `cpuid' | ||
1588 | */ | ||
1589 | features = cpuid_edx(1); | ||
1590 | if (!(features & (1 << X86_FEATURE_APIC))) { | ||
1591 | pr_warning("Could not enable APIC!\n"); | ||
1592 | return -1; | ||
1593 | } | 1624 | } |
1594 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1595 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
1596 | |||
1597 | /* The BIOS may have set up the APIC at some other address */ | ||
1598 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1599 | if (l & MSR_IA32_APICBASE_ENABLE) | ||
1600 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | ||
1601 | |||
1602 | pr_info("Found and enabled local APIC!\n"); | ||
1603 | 1625 | ||
1604 | apic_pm_activate(); | 1626 | apic_pm_activate(); |
1605 | 1627 | ||
@@ -1687,7 +1709,7 @@ void __init init_apic_mappings(void) | |||
1687 | * This initializes the IO-APIC and APIC hardware if this is | 1709 | * This initializes the IO-APIC and APIC hardware if this is |
1688 | * a UP kernel. | 1710 | * a UP kernel. |
1689 | */ | 1711 | */ |
1690 | int apic_version[MAX_APICS]; | 1712 | int apic_version[MAX_LOCAL_APIC]; |
1691 | 1713 | ||
1692 | int __init APIC_init_uniprocessor(void) | 1714 | int __init APIC_init_uniprocessor(void) |
1693 | { | 1715 | { |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 16c2db8750a2..f6cd5b410770 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1933,8 +1933,7 @@ void disable_IO_APIC(void) | |||
1933 | * | 1933 | * |
1934 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | 1934 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
1935 | */ | 1935 | */ |
1936 | 1936 | void __init setup_ioapic_ids_from_mpc_nocheck(void) | |
1937 | void __init setup_ioapic_ids_from_mpc(void) | ||
1938 | { | 1937 | { |
1939 | union IO_APIC_reg_00 reg_00; | 1938 | union IO_APIC_reg_00 reg_00; |
1940 | physid_mask_t phys_id_present_map; | 1939 | physid_mask_t phys_id_present_map; |
@@ -1943,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
1943 | unsigned char old_id; | 1942 | unsigned char old_id; |
1944 | unsigned long flags; | 1943 | unsigned long flags; |
1945 | 1944 | ||
1946 | if (acpi_ioapic) | ||
1947 | return; | ||
1948 | /* | ||
1949 | * Don't check I/O APIC IDs for xAPIC systems. They have | ||
1950 | * no meaning without the serial APIC bus. | ||
1951 | */ | ||
1952 | if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
1953 | || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
1954 | return; | ||
1955 | /* | 1945 | /* |
1956 | * This is broken; anything with a real cpu count has to | 1946 | * This is broken; anything with a real cpu count has to |
1957 | * circumvent this idiocy regardless. | 1947 | * circumvent this idiocy regardless. |
@@ -2005,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2005 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 1995 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
2006 | } | 1996 | } |
2007 | 1997 | ||
2008 | |||
2009 | /* | 1998 | /* |
2010 | * We need to adjust the IRQ routing table | 1999 | * We need to adjust the IRQ routing table |
2011 | * if the ID changed. | 2000 | * if the ID changed. |
@@ -2041,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2041 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2030 | apic_printk(APIC_VERBOSE, " ok.\n"); |
2042 | } | 2031 | } |
2043 | } | 2032 | } |
2033 | |||
2034 | void __init setup_ioapic_ids_from_mpc(void) | ||
2035 | { | ||
2036 | |||
2037 | if (acpi_ioapic) | ||
2038 | return; | ||
2039 | /* | ||
2040 | * Don't check I/O APIC IDs for xAPIC systems. They have | ||
2041 | * no meaning without the serial APIC bus. | ||
2042 | */ | ||
2043 | if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
2044 | || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
2045 | return; | ||
2046 | setup_ioapic_ids_from_mpc_nocheck(); | ||
2047 | } | ||
2044 | #endif | 2048 | #endif |
2045 | 2049 | ||
2046 | int no_timer_check __initdata; | 2050 | int no_timer_check __initdata; |
@@ -3593,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3593 | return reg_01.bits.entries + 1; | 3597 | return reg_01.bits.entries + 1; |
3594 | } | 3598 | } |
3595 | 3599 | ||
3596 | void __init probe_nr_irqs_gsi(void) | 3600 | static void __init probe_nr_irqs_gsi(void) |
3597 | { | 3601 | { |
3598 | int nr; | 3602 | int nr; |
3599 | 3603 | ||
@@ -3910,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) | |||
3910 | return res; | 3914 | return res; |
3911 | } | 3915 | } |
3912 | 3916 | ||
3913 | void __init ioapic_init_mappings(void) | 3917 | void __init ioapic_and_gsi_init(void) |
3914 | { | 3918 | { |
3915 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | 3919 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; |
3916 | struct resource *ioapic_res; | 3920 | struct resource *ioapic_res; |
@@ -3948,6 +3952,8 @@ fake_ioapic_page: | |||
3948 | ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; | 3952 | ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; |
3949 | ioapic_res++; | 3953 | ioapic_res++; |
3950 | } | 3954 | } |
3955 | |||
3956 | probe_nr_irqs_gsi(); | ||
3951 | } | 3957 | } |
3952 | 3958 | ||
3953 | void __init ioapic_insert_resources(void) | 3959 | void __init ioapic_insert_resources(void) |
@@ -4057,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void) | |||
4057 | 4063 | ||
4058 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | 4064 | printk(KERN_INFO "Early APIC setup for system timer0\n"); |
4059 | #ifndef CONFIG_SMP | 4065 | #ifndef CONFIG_SMP |
4060 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 4066 | physid_set_mask_of_physid(boot_cpu_physical_apicid, |
4067 | &phys_cpu_present_map); | ||
4061 | #endif | 4068 | #endif |
4062 | /* Make sure the irq descriptor is set up */ | 4069 | /* Make sure the irq descriptor is set up */ |
4063 | cfg = alloc_irq_and_cfg_at(0, 0); | 4070 | cfg = alloc_irq_and_cfg_at(0, 0); |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 927902d90fe6..936613e77113 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits; | |||
48 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); | 48 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); |
49 | static DEFINE_SPINLOCK(uv_nmi_lock); | 49 | static DEFINE_SPINLOCK(uv_nmi_lock); |
50 | 50 | ||
51 | static unsigned long __init uv_early_read_mmr(unsigned long addr) | ||
52 | { | ||
53 | unsigned long val, *mmr; | ||
54 | |||
55 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); | ||
56 | val = *mmr; | ||
57 | early_iounmap(mmr, sizeof(*mmr)); | ||
58 | return val; | ||
59 | } | ||
60 | |||
51 | static inline bool is_GRU_range(u64 start, u64 end) | 61 | static inline bool is_GRU_range(u64 start, u64 end) |
52 | { | 62 | { |
53 | return start >= gru_start_paddr && end <= gru_end_paddr; | 63 | return start >= gru_start_paddr && end <= gru_end_paddr; |
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) | |||
58 | return is_ISA_range(start, end) || is_GRU_range(start, end); | 68 | return is_ISA_range(start, end) || is_GRU_range(start, end); |
59 | } | 69 | } |
60 | 70 | ||
61 | static int early_get_nodeid(void) | 71 | static int __init early_get_pnodeid(void) |
62 | { | 72 | { |
63 | union uvh_node_id_u node_id; | 73 | union uvh_node_id_u node_id; |
64 | unsigned long *mmr; | 74 | union uvh_rh_gam_config_mmr_u m_n_config; |
65 | 75 | int pnode; | |
66 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); | ||
67 | node_id.v = *mmr; | ||
68 | early_iounmap(mmr, sizeof(*mmr)); | ||
69 | 76 | ||
70 | /* Currently, all blades have same revision number */ | 77 | /* Currently, all blades have same revision number */ |
78 | node_id.v = uv_early_read_mmr(UVH_NODE_ID); | ||
79 | m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); | ||
71 | uv_min_hub_revision_id = node_id.s.revision; | 80 | uv_min_hub_revision_id = node_id.s.revision; |
72 | 81 | ||
73 | return node_id.s.node_id; | 82 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); |
83 | return pnode; | ||
74 | } | 84 | } |
75 | 85 | ||
76 | static void __init early_get_apic_pnode_shift(void) | 86 | static void __init early_get_apic_pnode_shift(void) |
77 | { | 87 | { |
78 | unsigned long *mmr; | 88 | uvh_apicid.v = uv_early_read_mmr(UVH_APICID); |
79 | |||
80 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); | ||
81 | uvh_apicid.v = *mmr; | ||
82 | early_iounmap(mmr, sizeof(*mmr)); | ||
83 | if (!uvh_apicid.v) | 89 | if (!uvh_apicid.v) |
84 | /* | 90 | /* |
85 | * Old bios, use default value | 91 | * Old bios, use default value |
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void) | |||
95 | static void __init uv_set_apicid_hibit(void) | 101 | static void __init uv_set_apicid_hibit(void) |
96 | { | 102 | { |
97 | union uvh_lb_target_physical_apic_id_mask_u apicid_mask; | 103 | union uvh_lb_target_physical_apic_id_mask_u apicid_mask; |
98 | unsigned long *mmr; | ||
99 | 104 | ||
100 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | | 105 | apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); |
101 | UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); | ||
102 | apicid_mask.v = *mmr; | ||
103 | early_iounmap(mmr, sizeof(*mmr)); | ||
104 | uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; | 106 | uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; |
105 | } | 107 | } |
106 | 108 | ||
107 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 109 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
108 | { | 110 | { |
109 | int nodeid; | 111 | int pnodeid; |
110 | 112 | ||
111 | if (!strcmp(oem_id, "SGI")) { | 113 | if (!strcmp(oem_id, "SGI")) { |
112 | nodeid = early_get_nodeid(); | 114 | pnodeid = early_get_pnodeid(); |
113 | early_get_apic_pnode_shift(); | 115 | early_get_apic_pnode_shift(); |
114 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 116 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
115 | x86_platform.nmi_init = uv_nmi_init; | 117 | x86_platform.nmi_init = uv_nmi_init; |
@@ -119,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
119 | uv_system_type = UV_X2APIC; | 121 | uv_system_type = UV_X2APIC; |
120 | else if (!strcmp(oem_table_id, "UVH")) { | 122 | else if (!strcmp(oem_table_id, "UVH")) { |
121 | __get_cpu_var(x2apic_extra_bits) = | 123 | __get_cpu_var(x2apic_extra_bits) = |
122 | nodeid << (uvh_apicid.s.pnode_shift - 1); | 124 | pnodeid << uvh_apicid.s.pnode_shift; |
123 | uv_system_type = UV_NON_UNIQUE_APIC; | 125 | uv_system_type = UV_NON_UNIQUE_APIC; |
124 | uv_set_apicid_hibit(); | 126 | uv_set_apicid_hibit(); |
125 | return 1; | 127 | return 1; |
@@ -682,27 +684,32 @@ void uv_nmi_init(void) | |||
682 | void __init uv_system_init(void) | 684 | void __init uv_system_init(void) |
683 | { | 685 | { |
684 | union uvh_rh_gam_config_mmr_u m_n_config; | 686 | union uvh_rh_gam_config_mmr_u m_n_config; |
687 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | ||
685 | union uvh_node_id_u node_id; | 688 | union uvh_node_id_u node_id; |
686 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | 689 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; |
687 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; | 690 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; |
688 | int gnode_extra, max_pnode = 0; | 691 | int gnode_extra, max_pnode = 0; |
689 | unsigned long mmr_base, present, paddr; | 692 | unsigned long mmr_base, present, paddr; |
690 | unsigned short pnode_mask; | 693 | unsigned short pnode_mask, pnode_io_mask; |
691 | 694 | ||
692 | map_low_mmrs(); | 695 | map_low_mmrs(); |
693 | 696 | ||
694 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); | 697 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); |
695 | m_val = m_n_config.s.m_skt; | 698 | m_val = m_n_config.s.m_skt; |
696 | n_val = m_n_config.s.n_skt; | 699 | n_val = m_n_config.s.n_skt; |
700 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | ||
701 | n_io = mmioh.s.n_io; | ||
697 | mmr_base = | 702 | mmr_base = |
698 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | 703 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & |
699 | ~UV_MMR_ENABLE; | 704 | ~UV_MMR_ENABLE; |
700 | pnode_mask = (1 << n_val) - 1; | 705 | pnode_mask = (1 << n_val) - 1; |
706 | pnode_io_mask = (1 << n_io) - 1; | ||
707 | |||
701 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | 708 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); |
702 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; | 709 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; |
703 | gnode_upper = ((unsigned long)gnode_extra << m_val); | 710 | gnode_upper = ((unsigned long)gnode_extra << m_val); |
704 | printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", | 711 | printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", |
705 | n_val, m_val, gnode_upper, gnode_extra); | 712 | n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); |
706 | 713 | ||
707 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); | 714 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); |
708 | 715 | ||
@@ -735,7 +742,7 @@ void __init uv_system_init(void) | |||
735 | for (j = 0; j < 64; j++) { | 742 | for (j = 0; j < 64; j++) { |
736 | if (!test_bit(j, &present)) | 743 | if (!test_bit(j, &present)) |
737 | continue; | 744 | continue; |
738 | pnode = (i * 64 + j); | 745 | pnode = (i * 64 + j) & pnode_mask; |
739 | uv_blade_info[blade].pnode = pnode; | 746 | uv_blade_info[blade].pnode = pnode; |
740 | uv_blade_info[blade].nr_possible_cpus = 0; | 747 | uv_blade_info[blade].nr_possible_cpus = 0; |
741 | uv_blade_info[blade].nr_online_cpus = 0; | 748 | uv_blade_info[blade].nr_online_cpus = 0; |
@@ -756,6 +763,7 @@ void __init uv_system_init(void) | |||
756 | /* | 763 | /* |
757 | * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); | 764 | * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); |
758 | */ | 765 | */ |
766 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | ||
759 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; | 767 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; |
760 | pnode = uv_apicid_to_pnode(apicid); | 768 | pnode = uv_apicid_to_pnode(apicid); |
761 | blade = boot_pnode_to_blade(pnode); | 769 | blade = boot_pnode_to_blade(pnode); |
@@ -772,7 +780,6 @@ void __init uv_system_init(void) | |||
772 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 780 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
773 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; | 781 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; |
774 | uv_cpu_hub_info(cpu)->pnode = pnode; | 782 | uv_cpu_hub_info(cpu)->pnode = pnode; |
775 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | ||
776 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; | 783 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; |
777 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 784 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
778 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 785 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
@@ -796,7 +803,7 @@ void __init uv_system_init(void) | |||
796 | 803 | ||
797 | map_gru_high(max_pnode); | 804 | map_gru_high(max_pnode); |
798 | map_mmr_high(max_pnode); | 805 | map_mmr_high(max_pnode); |
799 | map_mmioh_high(max_pnode); | 806 | map_mmioh_high(max_pnode & pnode_io_mask); |
800 | 807 | ||
801 | uv_cpu_init(); | 808 | uv_cpu_init(); |
802 | uv_scir_register_cpu_notifier(); | 809 | uv_scir_register_cpu_notifier(); |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 17ad03366211..9ecf81f9b90f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx { | |||
149 | }; | 149 | }; |
150 | 150 | ||
151 | struct amd_l3_cache { | 151 | struct amd_l3_cache { |
152 | struct pci_dev *dev; | 152 | struct amd_northbridge *nb; |
153 | bool can_disable; | ||
154 | unsigned indices; | 153 | unsigned indices; |
155 | u8 subcaches[4]; | 154 | u8 subcaches[4]; |
156 | }; | 155 | }; |
@@ -311,14 +310,12 @@ struct _cache_attr { | |||
311 | /* | 310 | /* |
312 | * L3 cache descriptors | 311 | * L3 cache descriptors |
313 | */ | 312 | */ |
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | 313 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) |
317 | { | 314 | { |
318 | unsigned int sc0, sc1, sc2, sc3; | 315 | unsigned int sc0, sc1, sc2, sc3; |
319 | u32 val = 0; | 316 | u32 val = 0; |
320 | 317 | ||
321 | pci_read_config_dword(l3->dev, 0x1C4, &val); | 318 | pci_read_config_dword(l3->nb->misc, 0x1C4, &val); |
322 | 319 | ||
323 | /* calculate subcache sizes */ | 320 | /* calculate subcache sizes */ |
324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); | 321 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | |||
330 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; | 327 | l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; |
331 | } | 328 | } |
332 | 329 | ||
333 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | 330 | static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, |
334 | { | 331 | int index) |
335 | struct amd_l3_cache *l3; | ||
336 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
337 | |||
338 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
339 | if (!l3) { | ||
340 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
341 | return NULL; | ||
342 | } | ||
343 | |||
344 | l3->dev = dev; | ||
345 | |||
346 | amd_calc_l3_indices(l3); | ||
347 | |||
348 | return l3; | ||
349 | } | ||
350 | |||
351 | static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | ||
352 | int index) | ||
353 | { | 332 | { |
333 | static struct amd_l3_cache *__cpuinitdata l3_caches; | ||
354 | int node; | 334 | int node; |
355 | 335 | ||
356 | if (boot_cpu_data.x86 != 0x10) | 336 | /* only for L3, and not in virtualized environments */ |
357 | return; | 337 | if (index < 3 || amd_nb_num() == 0) |
358 | |||
359 | if (index < 3) | ||
360 | return; | ||
361 | |||
362 | /* see errata #382 and #388 */ | ||
363 | if (boot_cpu_data.x86_model < 0x8) | ||
364 | return; | ||
365 | |||
366 | if ((boot_cpu_data.x86_model == 0x8 || | ||
367 | boot_cpu_data.x86_model == 0x9) | ||
368 | && | ||
369 | boot_cpu_data.x86_mask < 0x1) | ||
370 | return; | ||
371 | |||
372 | /* not in virtualized environments */ | ||
373 | if (k8_northbridges.num == 0) | ||
374 | return; | 338 | return; |
375 | 339 | ||
376 | /* | 340 | /* |
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
378 | * never freed but this is done only on shutdown so it doesn't matter. | 342 | * never freed but this is done only on shutdown so it doesn't matter. |
379 | */ | 343 | */ |
380 | if (!l3_caches) { | 344 | if (!l3_caches) { |
381 | int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); | 345 | int size = amd_nb_num() * sizeof(struct amd_l3_cache); |
382 | 346 | ||
383 | l3_caches = kzalloc(size, GFP_ATOMIC); | 347 | l3_caches = kzalloc(size, GFP_ATOMIC); |
384 | if (!l3_caches) | 348 | if (!l3_caches) |
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
387 | 351 | ||
388 | node = amd_get_nb_id(smp_processor_id()); | 352 | node = amd_get_nb_id(smp_processor_id()); |
389 | 353 | ||
390 | if (!l3_caches[node]) { | 354 | if (!l3_caches[node].nb) { |
391 | l3_caches[node] = amd_init_l3_cache(node); | 355 | l3_caches[node].nb = node_to_amd_nb(node); |
392 | l3_caches[node]->can_disable = true; | 356 | amd_calc_l3_indices(&l3_caches[node]); |
393 | } | 357 | } |
394 | 358 | ||
395 | WARN_ON(!l3_caches[node]); | 359 | this_leaf->l3 = &l3_caches[node]; |
396 | |||
397 | this_leaf->l3 = l3_caches[node]; | ||
398 | } | 360 | } |
399 | 361 | ||
400 | /* | 362 | /* |
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | |||
408 | { | 370 | { |
409 | unsigned int reg = 0; | 371 | unsigned int reg = 0; |
410 | 372 | ||
411 | pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); | 373 | pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, ®); |
412 | 374 | ||
413 | /* check whether this slot is activated already */ | 375 | /* check whether this slot is activated already */ |
414 | if (reg & (3UL << 30)) | 376 | if (reg & (3UL << 30)) |
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | |||
422 | { | 384 | { |
423 | int index; | 385 | int index; |
424 | 386 | ||
425 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 387 | if (!this_leaf->l3 || |
388 | !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
426 | return -EINVAL; | 389 | return -EINVAL; |
427 | 390 | ||
428 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); | 391 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); |
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
457 | if (!l3->subcaches[i]) | 420 | if (!l3->subcaches[i]) |
458 | continue; | 421 | continue; |
459 | 422 | ||
460 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | 423 | pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); |
461 | 424 | ||
462 | /* | 425 | /* |
463 | * We need to WBINVD on a core on the node containing the L3 | 426 | * We need to WBINVD on a core on the node containing the L3 |
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
467 | wbinvd_on_cpu(cpu); | 430 | wbinvd_on_cpu(cpu); |
468 | 431 | ||
469 | reg |= BIT(31); | 432 | reg |= BIT(31); |
470 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | 433 | pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); |
471 | } | 434 | } |
472 | } | 435 | } |
473 | 436 | ||
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
524 | if (!capable(CAP_SYS_ADMIN)) | 487 | if (!capable(CAP_SYS_ADMIN)) |
525 | return -EPERM; | 488 | return -EPERM; |
526 | 489 | ||
527 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | 490 | if (!this_leaf->l3 || |
491 | !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
528 | return -EINVAL; | 492 | return -EINVAL; |
529 | 493 | ||
530 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 494 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
545 | #define STORE_CACHE_DISABLE(slot) \ | 509 | #define STORE_CACHE_DISABLE(slot) \ |
546 | static ssize_t \ | 510 | static ssize_t \ |
547 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ | 511 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
548 | const char *buf, size_t count) \ | 512 | const char *buf, size_t count) \ |
549 | { \ | 513 | { \ |
550 | return store_cache_disable(this_leaf, buf, count, slot); \ | 514 | return store_cache_disable(this_leaf, buf, count, slot); \ |
551 | } | 515 | } |
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | |||
558 | show_cache_disable_1, store_cache_disable_1); | 522 | show_cache_disable_1, store_cache_disable_1); |
559 | 523 | ||
560 | #else /* CONFIG_AMD_NB */ | 524 | #else /* CONFIG_AMD_NB */ |
561 | static void __cpuinit | 525 | #define amd_init_l3_cache(x, y) |
562 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) | ||
563 | { | ||
564 | }; | ||
565 | #endif /* CONFIG_AMD_NB */ | 526 | #endif /* CONFIG_AMD_NB */ |
566 | 527 | ||
567 | static int | 528 | static int |
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
575 | 536 | ||
576 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 537 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
577 | amd_cpuid4(index, &eax, &ebx, &ecx); | 538 | amd_cpuid4(index, &eax, &ebx, &ecx); |
578 | amd_check_l3_disable(this_leaf, index); | 539 | amd_init_l3_cache(this_leaf, index); |
579 | } else { | 540 | } else { |
580 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 541 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
581 | } | 542 | } |
@@ -983,30 +944,48 @@ define_one_ro(size); | |||
983 | define_one_ro(shared_cpu_map); | 944 | define_one_ro(shared_cpu_map); |
984 | define_one_ro(shared_cpu_list); | 945 | define_one_ro(shared_cpu_list); |
985 | 946 | ||
986 | #define DEFAULT_SYSFS_CACHE_ATTRS \ | ||
987 | &type.attr, \ | ||
988 | &level.attr, \ | ||
989 | &coherency_line_size.attr, \ | ||
990 | &physical_line_partition.attr, \ | ||
991 | &ways_of_associativity.attr, \ | ||
992 | &number_of_sets.attr, \ | ||
993 | &size.attr, \ | ||
994 | &shared_cpu_map.attr, \ | ||
995 | &shared_cpu_list.attr | ||
996 | |||
997 | static struct attribute *default_attrs[] = { | 947 | static struct attribute *default_attrs[] = { |
998 | DEFAULT_SYSFS_CACHE_ATTRS, | 948 | &type.attr, |
949 | &level.attr, | ||
950 | &coherency_line_size.attr, | ||
951 | &physical_line_partition.attr, | ||
952 | &ways_of_associativity.attr, | ||
953 | &number_of_sets.attr, | ||
954 | &size.attr, | ||
955 | &shared_cpu_map.attr, | ||
956 | &shared_cpu_list.attr, | ||
999 | NULL | 957 | NULL |
1000 | }; | 958 | }; |
1001 | 959 | ||
1002 | static struct attribute *default_l3_attrs[] = { | ||
1003 | DEFAULT_SYSFS_CACHE_ATTRS, | ||
1004 | #ifdef CONFIG_AMD_NB | 960 | #ifdef CONFIG_AMD_NB |
1005 | &cache_disable_0.attr, | 961 | static struct attribute ** __cpuinit amd_l3_attrs(void) |
1006 | &cache_disable_1.attr, | 962 | { |
963 | static struct attribute **attrs; | ||
964 | int n; | ||
965 | |||
966 | if (attrs) | ||
967 | return attrs; | ||
968 | |||
969 | n = sizeof (default_attrs) / sizeof (struct attribute *); | ||
970 | |||
971 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
972 | n += 2; | ||
973 | |||
974 | attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); | ||
975 | if (attrs == NULL) | ||
976 | return attrs = default_attrs; | ||
977 | |||
978 | for (n = 0; default_attrs[n]; n++) | ||
979 | attrs[n] = default_attrs[n]; | ||
980 | |||
981 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { | ||
982 | attrs[n++] = &cache_disable_0.attr; | ||
983 | attrs[n++] = &cache_disable_1.attr; | ||
984 | } | ||
985 | |||
986 | return attrs; | ||
987 | } | ||
1007 | #endif | 988 | #endif |
1008 | NULL | ||
1009 | }; | ||
1010 | 989 | ||
1011 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 990 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
1012 | { | 991 | { |
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1117 | 1096 | ||
1118 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1097 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
1119 | 1098 | ||
1120 | if (this_leaf->l3 && this_leaf->l3->can_disable) | 1099 | ktype_cache.default_attrs = default_attrs; |
1121 | ktype_cache.default_attrs = default_l3_attrs; | 1100 | #ifdef CONFIG_AMD_NB |
1122 | else | 1101 | if (this_leaf->l3) |
1123 | ktype_cache.default_attrs = default_attrs; | 1102 | ktype_cache.default_attrs = amd_l3_attrs(); |
1124 | 1103 | #endif | |
1125 | retval = kobject_init_and_add(&(this_object->kobj), | 1104 | retval = kobject_init_and_add(&(this_object->kobj), |
1126 | &ktype_cache, | 1105 | &ktype_cache, |
1127 | per_cpu(ici_cache_kobject, cpu), | 1106 | per_cpu(ici_cache_kobject, cpu), |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 80c482382d5c..5bf2fac52aca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include <asm/mce.h> | 31 | #include <asm/mce.h> |
32 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
33 | 33 | ||
34 | #define PFX "mce_threshold: " | ||
35 | #define VERSION "version 1.1.1" | ||
36 | #define NR_BANKS 6 | 34 | #define NR_BANKS 6 |
37 | #define NR_BLOCKS 9 | 35 | #define NR_BLOCKS 9 |
38 | #define THRESHOLD_MAX 0xFFF | 36 | #define THRESHOLD_MAX 0xFFF |
@@ -59,12 +57,6 @@ struct threshold_block { | |||
59 | struct list_head miscj; | 57 | struct list_head miscj; |
60 | }; | 58 | }; |
61 | 59 | ||
62 | /* defaults used early on boot */ | ||
63 | static struct threshold_block threshold_defaults = { | ||
64 | .interrupt_enable = 0, | ||
65 | .threshold_limit = THRESHOLD_MAX, | ||
66 | }; | ||
67 | |||
68 | struct threshold_bank { | 60 | struct threshold_bank { |
69 | struct kobject *kobj; | 61 | struct kobject *kobj; |
70 | struct threshold_block *blocks; | 62 | struct threshold_block *blocks; |
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void); | |||
89 | struct thresh_restart { | 81 | struct thresh_restart { |
90 | struct threshold_block *b; | 82 | struct threshold_block *b; |
91 | int reset; | 83 | int reset; |
84 | int set_lvt_off; | ||
85 | int lvt_off; | ||
92 | u16 old_limit; | 86 | u16 old_limit; |
93 | }; | 87 | }; |
94 | 88 | ||
89 | static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) | ||
90 | { | ||
91 | int msr = (hi & MASK_LVTOFF_HI) >> 20; | ||
92 | |||
93 | if (apic < 0) { | ||
94 | pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " | ||
95 | "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, | ||
96 | b->bank, b->block, b->address, hi, lo); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | if (apic != msr) { | ||
101 | pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " | ||
102 | "for bank %d, block %d (MSR%08X=0x%x%08x)\n", | ||
103 | b->cpu, apic, b->bank, b->block, b->address, hi, lo); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | return 1; | ||
108 | }; | ||
109 | |||
95 | /* must be called with correct cpu affinity */ | 110 | /* must be called with correct cpu affinity */ |
96 | /* Called via smp_call_function_single() */ | 111 | /* Called via smp_call_function_single() */ |
97 | static void threshold_restart_bank(void *_tr) | 112 | static void threshold_restart_bank(void *_tr) |
98 | { | 113 | { |
99 | struct thresh_restart *tr = _tr; | 114 | struct thresh_restart *tr = _tr; |
100 | u32 mci_misc_hi, mci_misc_lo; | 115 | u32 hi, lo; |
101 | 116 | ||
102 | rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); | 117 | rdmsr(tr->b->address, lo, hi); |
103 | 118 | ||
104 | if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 119 | if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) |
105 | tr->reset = 1; /* limit cannot be lower than err count */ | 120 | tr->reset = 1; /* limit cannot be lower than err count */ |
106 | 121 | ||
107 | if (tr->reset) { /* reset err count and overflow bit */ | 122 | if (tr->reset) { /* reset err count and overflow bit */ |
108 | mci_misc_hi = | 123 | hi = |
109 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | 124 | (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | |
110 | (THRESHOLD_MAX - tr->b->threshold_limit); | 125 | (THRESHOLD_MAX - tr->b->threshold_limit); |
111 | } else if (tr->old_limit) { /* change limit w/o reset */ | 126 | } else if (tr->old_limit) { /* change limit w/o reset */ |
112 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 127 | int new_count = (hi & THRESHOLD_MAX) + |
113 | (tr->old_limit - tr->b->threshold_limit); | 128 | (tr->old_limit - tr->b->threshold_limit); |
114 | 129 | ||
115 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 130 | hi = (hi & ~MASK_ERR_COUNT_HI) | |
116 | (new_count & THRESHOLD_MAX); | 131 | (new_count & THRESHOLD_MAX); |
117 | } | 132 | } |
118 | 133 | ||
134 | if (tr->set_lvt_off) { | ||
135 | if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { | ||
136 | /* set new lvt offset */ | ||
137 | hi &= ~MASK_LVTOFF_HI; | ||
138 | hi |= tr->lvt_off << 20; | ||
139 | } | ||
140 | } | ||
141 | |||
119 | tr->b->interrupt_enable ? | 142 | tr->b->interrupt_enable ? |
120 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 143 | (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : |
121 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 144 | (hi &= ~MASK_INT_TYPE_HI); |
122 | 145 | ||
123 | mci_misc_hi |= MASK_COUNT_EN_HI; | 146 | hi |= MASK_COUNT_EN_HI; |
124 | wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); | 147 | wrmsr(tr->b->address, lo, hi); |
148 | } | ||
149 | |||
150 | static void mce_threshold_block_init(struct threshold_block *b, int offset) | ||
151 | { | ||
152 | struct thresh_restart tr = { | ||
153 | .b = b, | ||
154 | .set_lvt_off = 1, | ||
155 | .lvt_off = offset, | ||
156 | }; | ||
157 | |||
158 | b->threshold_limit = THRESHOLD_MAX; | ||
159 | threshold_restart_bank(&tr); | ||
160 | }; | ||
161 | |||
162 | static int setup_APIC_mce(int reserved, int new) | ||
163 | { | ||
164 | if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, | ||
165 | APIC_EILVT_MSG_FIX, 0)) | ||
166 | return new; | ||
167 | |||
168 | return reserved; | ||
125 | } | 169 | } |
126 | 170 | ||
127 | /* cpu init entry point, called from mce.c with preempt off */ | 171 | /* cpu init entry point, called from mce.c with preempt off */ |
128 | void mce_amd_feature_init(struct cpuinfo_x86 *c) | 172 | void mce_amd_feature_init(struct cpuinfo_x86 *c) |
129 | { | 173 | { |
174 | struct threshold_block b; | ||
130 | unsigned int cpu = smp_processor_id(); | 175 | unsigned int cpu = smp_processor_id(); |
131 | u32 low = 0, high = 0, address = 0; | 176 | u32 low = 0, high = 0, address = 0; |
132 | unsigned int bank, block; | 177 | unsigned int bank, block; |
133 | struct thresh_restart tr; | 178 | int offset = -1; |
134 | int lvt_off = -1; | ||
135 | u8 offset; | ||
136 | 179 | ||
137 | for (bank = 0; bank < NR_BANKS; ++bank) { | 180 | for (bank = 0; bank < NR_BANKS; ++bank) { |
138 | for (block = 0; block < NR_BLOCKS; ++block) { | 181 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
163 | if (shared_bank[bank] && c->cpu_core_id) | 206 | if (shared_bank[bank] && c->cpu_core_id) |
164 | break; | 207 | break; |
165 | #endif | 208 | #endif |
166 | offset = (high & MASK_LVTOFF_HI) >> 20; | 209 | offset = setup_APIC_mce(offset, |
167 | if (lvt_off < 0) { | 210 | (high & MASK_LVTOFF_HI) >> 20); |
168 | if (setup_APIC_eilvt(offset, | ||
169 | THRESHOLD_APIC_VECTOR, | ||
170 | APIC_EILVT_MSG_FIX, 0)) { | ||
171 | pr_err(FW_BUG "cpu %d, failed to " | ||
172 | "setup threshold interrupt " | ||
173 | "for bank %d, block %d " | ||
174 | "(MSR%08X=0x%x%08x)", | ||
175 | smp_processor_id(), bank, block, | ||
176 | address, high, low); | ||
177 | continue; | ||
178 | } | ||
179 | lvt_off = offset; | ||
180 | } else if (lvt_off != offset) { | ||
181 | pr_err(FW_BUG "cpu %d, invalid threshold " | ||
182 | "interrupt offset %d for bank %d," | ||
183 | "block %d (MSR%08X=0x%x%08x)", | ||
184 | smp_processor_id(), lvt_off, bank, | ||
185 | block, address, high, low); | ||
186 | continue; | ||
187 | } | ||
188 | |||
189 | high &= ~MASK_LVTOFF_HI; | ||
190 | high |= lvt_off << 20; | ||
191 | wrmsr(address, low, high); | ||
192 | 211 | ||
193 | threshold_defaults.address = address; | 212 | memset(&b, 0, sizeof(b)); |
194 | tr.b = &threshold_defaults; | 213 | b.cpu = cpu; |
195 | tr.reset = 0; | 214 | b.bank = bank; |
196 | tr.old_limit = 0; | 215 | b.block = block; |
197 | threshold_restart_bank(&tr); | 216 | b.address = address; |
198 | 217 | ||
218 | mce_threshold_block_init(&b, offset); | ||
199 | mce_threshold_vector = amd_threshold_interrupt; | 219 | mce_threshold_vector = amd_threshold_interrupt; |
200 | } | 220 | } |
201 | } | 221 | } |
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) | |||
298 | 318 | ||
299 | b->interrupt_enable = !!new; | 319 | b->interrupt_enable = !!new; |
300 | 320 | ||
321 | memset(&tr, 0, sizeof(tr)); | ||
301 | tr.b = b; | 322 | tr.b = b; |
302 | tr.reset = 0; | ||
303 | tr.old_limit = 0; | ||
304 | 323 | ||
305 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 324 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
306 | 325 | ||
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
321 | if (new < 1) | 340 | if (new < 1) |
322 | new = 1; | 341 | new = 1; |
323 | 342 | ||
343 | memset(&tr, 0, sizeof(tr)); | ||
324 | tr.old_limit = b->threshold_limit; | 344 | tr.old_limit = b->threshold_limit; |
325 | b->threshold_limit = new; | 345 | b->threshold_limit = new; |
326 | tr.b = b; | 346 | tr.b = b; |
327 | tr.reset = 0; | ||
328 | 347 | ||
329 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 348 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
330 | 349 | ||
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
603 | continue; | 622 | continue; |
604 | err = threshold_create_bank(cpu, bank); | 623 | err = threshold_create_bank(cpu, bank); |
605 | if (err) | 624 | if (err) |
606 | goto out; | 625 | return err; |
607 | } | 626 | } |
608 | out: | 627 | |
609 | return err; | 628 | return err; |
610 | } | 629 | } |
611 | 630 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4b683267eca5..e12246ff5aa6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -53,8 +53,13 @@ struct thermal_state { | |||
53 | struct _thermal_state core_power_limit; | 53 | struct _thermal_state core_power_limit; |
54 | struct _thermal_state package_throttle; | 54 | struct _thermal_state package_throttle; |
55 | struct _thermal_state package_power_limit; | 55 | struct _thermal_state package_power_limit; |
56 | struct _thermal_state core_thresh0; | ||
57 | struct _thermal_state core_thresh1; | ||
56 | }; | 58 | }; |
57 | 59 | ||
60 | /* Callback to handle core threshold interrupts */ | ||
61 | int (*platform_thermal_notify)(__u64 msr_val); | ||
62 | |||
58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 63 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
59 | 64 | ||
60 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 65 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
200 | return 0; | 205 | return 0; |
201 | } | 206 | } |
202 | 207 | ||
208 | static int thresh_event_valid(int event) | ||
209 | { | ||
210 | struct _thermal_state *state; | ||
211 | unsigned int this_cpu = smp_processor_id(); | ||
212 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
213 | u64 now = get_jiffies_64(); | ||
214 | |||
215 | state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; | ||
216 | |||
217 | if (time_before64(now, state->next_check)) | ||
218 | return 0; | ||
219 | |||
220 | state->next_check = now + CHECK_INTERVAL; | ||
221 | return 1; | ||
222 | } | ||
223 | |||
203 | #ifdef CONFIG_SYSFS | 224 | #ifdef CONFIG_SYSFS |
204 | /* Add/Remove thermal_throttle interface for CPU device: */ | 225 | /* Add/Remove thermal_throttle interface for CPU device: */ |
205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | 226 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, |
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device); | |||
313 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | 334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) |
314 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | 335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) |
315 | 336 | ||
337 | static void notify_thresholds(__u64 msr_val) | ||
338 | { | ||
339 | /* check whether the interrupt handler is defined; | ||
340 | * otherwise simply return | ||
341 | */ | ||
342 | if (!platform_thermal_notify) | ||
343 | return; | ||
344 | |||
345 | /* lower threshold reached */ | ||
346 | if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) | ||
347 | platform_thermal_notify(msr_val); | ||
348 | /* higher threshold reached */ | ||
349 | if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) | ||
350 | platform_thermal_notify(msr_val); | ||
351 | } | ||
352 | |||
316 | /* Thermal transition interrupt handler */ | 353 | /* Thermal transition interrupt handler */ |
317 | static void intel_thermal_interrupt(void) | 354 | static void intel_thermal_interrupt(void) |
318 | { | 355 | { |
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void) | |||
321 | 358 | ||
322 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 359 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
323 | 360 | ||
361 | /* Check for violation of core thermal thresholds*/ | ||
362 | notify_thresholds(msr_val); | ||
363 | |||
324 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 364 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
325 | THERMAL_THROTTLING_EVENT, | 365 | THERMAL_THROTTLING_EVENT, |
326 | CORE_LEVEL) != 0) | 366 | CORE_LEVEL) != 0) |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 4572f25f9325..cd28a350f7f9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf) | |||
240 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
241 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
242 | #endif | 242 | #endif |
243 | #ifdef CONFIG_X86_MRST_EARLY_PRINTK | 243 | #ifdef CONFIG_EARLY_PRINTK_MRST |
244 | if (!strncmp(buf, "mrst", 4)) { | 244 | if (!strncmp(buf, "mrst", 4)) { |
245 | mrst_early_console_init(); | 245 | mrst_early_console_init(); |
246 | early_console_register(&early_mrst_console, keep); | 246 | early_console_register(&early_mrst_console, keep); |
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf) | |||
250 | hsu_early_console_init(); | 250 | hsu_early_console_init(); |
251 | early_console_register(&early_hsu_console, keep); | 251 | early_console_register(&early_hsu_console, keep); |
252 | } | 252 | } |
253 | |||
254 | #endif | 253 | #endif |
255 | buf++; | 254 | buf++; |
256 | } | 255 | } |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2d..298448656b60 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/module.h> | ||
22 | 23 | ||
23 | #include <trace/syscall.h> | 24 | #include <trace/syscall.h> |
24 | 25 | ||
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); | |||
49 | int ftrace_arch_code_modify_prepare(void) | 50 | int ftrace_arch_code_modify_prepare(void) |
50 | { | 51 | { |
51 | set_kernel_text_rw(); | 52 | set_kernel_text_rw(); |
53 | set_all_modules_text_rw(); | ||
52 | modifying_code = 1; | 54 | modifying_code = 1; |
53 | return 0; | 55 | return 0; |
54 | } | 56 | } |
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) | |||
56 | int ftrace_arch_code_modify_post_process(void) | 58 | int ftrace_arch_code_modify_post_process(void) |
57 | { | 59 | { |
58 | modifying_code = 0; | 60 | modifying_code = 0; |
61 | set_all_modules_text_ro(); | ||
59 | set_kernel_text_ro(); | 62 | set_kernel_text_ro(); |
60 | return 0; | 63 | return 0; |
61 | } | 64 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 763310165fa0..7f138b3c3c52 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void) | |||
61 | case X86_SUBARCH_MRST: | 61 | case X86_SUBARCH_MRST: |
62 | x86_mrst_early_setup(); | 62 | x86_mrst_early_setup(); |
63 | break; | 63 | break; |
64 | case X86_SUBARCH_CE4100: | ||
65 | x86_ce4100_early_setup(); | ||
66 | break; | ||
64 | default: | 67 | default: |
65 | i386_default_early_setup(); | 68 | i386_default_early_setup(); |
66 | break; | 69 | break; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c0dbd9ac24f0..9f54b209c378 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -139,39 +139,6 @@ ENTRY(startup_32) | |||
139 | movl %eax, pa(olpc_ofw_pgd) | 139 | movl %eax, pa(olpc_ofw_pgd) |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | #ifdef CONFIG_PARAVIRT | ||
143 | /* This is can only trip for a broken bootloader... */ | ||
144 | cmpw $0x207, pa(boot_params + BP_version) | ||
145 | jb default_entry | ||
146 | |||
147 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
148 | we're booting under. */ | ||
149 | movl pa(boot_params + BP_hardware_subarch), %eax | ||
150 | cmpl $num_subarch_entries, %eax | ||
151 | jae bad_subarch | ||
152 | |||
153 | movl pa(subarch_entries)(,%eax,4), %eax | ||
154 | subl $__PAGE_OFFSET, %eax | ||
155 | jmp *%eax | ||
156 | |||
157 | bad_subarch: | ||
158 | WEAK(lguest_entry) | ||
159 | WEAK(xen_entry) | ||
160 | /* Unknown implementation; there's really | ||
161 | nothing we can do at this point. */ | ||
162 | ud2a | ||
163 | |||
164 | __INITDATA | ||
165 | |||
166 | subarch_entries: | ||
167 | .long default_entry /* normal x86/PC */ | ||
168 | .long lguest_entry /* lguest hypervisor */ | ||
169 | .long xen_entry /* Xen hypervisor */ | ||
170 | .long default_entry /* Moorestown MID */ | ||
171 | num_subarch_entries = (. - subarch_entries) / 4 | ||
172 | .previous | ||
173 | #endif /* CONFIG_PARAVIRT */ | ||
174 | |||
175 | /* | 142 | /* |
176 | * Initialize page tables. This creates a PDE and a set of page | 143 | * Initialize page tables. This creates a PDE and a set of page |
177 | * tables, which are located immediately beyond __brk_base. The variable | 144 | * tables, which are located immediately beyond __brk_base. The variable |
@@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
181 | * | 148 | * |
182 | * Note that the stack is not yet set up! | 149 | * Note that the stack is not yet set up! |
183 | */ | 150 | */ |
184 | default_entry: | ||
185 | #ifdef CONFIG_X86_PAE | 151 | #ifdef CONFIG_X86_PAE |
186 | 152 | ||
187 | /* | 153 | /* |
@@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
261 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax | 227 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax |
262 | movl %eax,pa(initial_page_table+0xffc) | 228 | movl %eax,pa(initial_page_table+0xffc) |
263 | #endif | 229 | #endif |
264 | jmp 3f | 230 | |
231 | #ifdef CONFIG_PARAVIRT | ||
232 | /* This is can only trip for a broken bootloader... */ | ||
233 | cmpw $0x207, pa(boot_params + BP_version) | ||
234 | jb default_entry | ||
235 | |||
236 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
237 | we're booting under. */ | ||
238 | movl pa(boot_params + BP_hardware_subarch), %eax | ||
239 | cmpl $num_subarch_entries, %eax | ||
240 | jae bad_subarch | ||
241 | |||
242 | movl pa(subarch_entries)(,%eax,4), %eax | ||
243 | subl $__PAGE_OFFSET, %eax | ||
244 | jmp *%eax | ||
245 | |||
246 | bad_subarch: | ||
247 | WEAK(lguest_entry) | ||
248 | WEAK(xen_entry) | ||
249 | /* Unknown implementation; there's really | ||
250 | nothing we can do at this point. */ | ||
251 | ud2a | ||
252 | |||
253 | __INITDATA | ||
254 | |||
255 | subarch_entries: | ||
256 | .long default_entry /* normal x86/PC */ | ||
257 | .long lguest_entry /* lguest hypervisor */ | ||
258 | .long xen_entry /* Xen hypervisor */ | ||
259 | .long default_entry /* Moorestown MID */ | ||
260 | num_subarch_entries = (. - subarch_entries) / 4 | ||
261 | .previous | ||
262 | #else | ||
263 | jmp default_entry | ||
264 | #endif /* CONFIG_PARAVIRT */ | ||
265 | |||
265 | /* | 266 | /* |
266 | * Non-boot CPU entry point; entered from trampoline.S | 267 | * Non-boot CPU entry point; entered from trampoline.S |
267 | * We can't lgdt here, because lgdt itself uses a data segment, but | 268 | * We can't lgdt here, because lgdt itself uses a data segment, but |
@@ -282,7 +283,7 @@ ENTRY(startup_32_smp) | |||
282 | movl %eax,%fs | 283 | movl %eax,%fs |
283 | movl %eax,%gs | 284 | movl %eax,%gs |
284 | #endif /* CONFIG_SMP */ | 285 | #endif /* CONFIG_SMP */ |
285 | 3: | 286 | default_entry: |
286 | 287 | ||
287 | /* | 288 | /* |
288 | * New page tables may be in 4Mbyte page mode and may | 289 | * New page tables may be in 4Mbyte page mode and may |
@@ -316,6 +317,10 @@ ENTRY(startup_32_smp) | |||
316 | subl $0x80000001, %eax | 317 | subl $0x80000001, %eax |
317 | cmpl $(0x8000ffff-0x80000001), %eax | 318 | cmpl $(0x8000ffff-0x80000001), %eax |
318 | ja 6f | 319 | ja 6f |
320 | |||
321 | /* Clear bogus XD_DISABLE bits */ | ||
322 | call verify_cpu | ||
323 | |||
319 | mov $0x80000001, %eax | 324 | mov $0x80000001, %eax |
320 | cpuid | 325 | cpuid |
321 | /* Execute Disable bit supported? */ | 326 | /* Execute Disable bit supported? */ |
@@ -611,6 +616,8 @@ ignore_int: | |||
611 | #endif | 616 | #endif |
612 | iret | 617 | iret |
613 | 618 | ||
619 | #include "verify_cpu.S" | ||
620 | |||
614 | __REFDATA | 621 | __REFDATA |
615 | .align 4 | 622 | .align 4 |
616 | ENTRY(initial_code) | 623 | ENTRY(initial_code) |
@@ -622,13 +629,13 @@ ENTRY(initial_code) | |||
622 | __PAGE_ALIGNED_BSS | 629 | __PAGE_ALIGNED_BSS |
623 | .align PAGE_SIZE_asm | 630 | .align PAGE_SIZE_asm |
624 | #ifdef CONFIG_X86_PAE | 631 | #ifdef CONFIG_X86_PAE |
625 | ENTRY(initial_pg_pmd) | 632 | initial_pg_pmd: |
626 | .fill 1024*KPMDS,4,0 | 633 | .fill 1024*KPMDS,4,0 |
627 | #else | 634 | #else |
628 | ENTRY(initial_page_table) | 635 | ENTRY(initial_page_table) |
629 | .fill 1024,4,0 | 636 | .fill 1024,4,0 |
630 | #endif | 637 | #endif |
631 | ENTRY(initial_pg_fixmap) | 638 | initial_pg_fixmap: |
632 | .fill 1024,4,0 | 639 | .fill 1024,4,0 |
633 | ENTRY(empty_zero_page) | 640 | ENTRY(empty_zero_page) |
634 | .fill 4096,1,0 | 641 | .fill 4096,1,0 |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index ce0cb4721c9a..0fe6d1a66c38 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu) | |||
155 | return 0; | 155 | return 0; |
156 | } | 156 | } |
157 | 157 | ||
158 | static int get_ucode_data(void *to, const u8 *from, size_t n) | ||
159 | { | ||
160 | memcpy(to, from, n); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static void * | 158 | static void * |
165 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | 159 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) |
166 | { | 160 | { |
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
168 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 162 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
169 | void *mc; | 163 | void *mc; |
170 | 164 | ||
171 | if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) | 165 | get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); |
172 | return NULL; | ||
173 | 166 | ||
174 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 167 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
175 | pr_err("error: invalid type field in container file section header\n"); | 168 | pr_err("error: invalid type field in container file section header\n"); |
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
183 | return NULL; | 176 | return NULL; |
184 | } | 177 | } |
185 | 178 | ||
186 | mc = vmalloc(UCODE_MAX_SIZE); | 179 | mc = vzalloc(UCODE_MAX_SIZE); |
187 | if (mc) { | 180 | if (!mc) |
188 | memset(mc, 0, UCODE_MAX_SIZE); | 181 | return NULL; |
189 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, | 182 | |
190 | total_size)) { | 183 | get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); |
191 | vfree(mc); | 184 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
192 | mc = NULL; | 185 | |
193 | } else | ||
194 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | ||
195 | } | ||
196 | return mc; | 186 | return mc; |
197 | } | 187 | } |
198 | 188 | ||
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
202 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 192 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
203 | unsigned long size; | 193 | unsigned long size; |
204 | 194 | ||
205 | if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) | 195 | get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); |
206 | return 0; | ||
207 | 196 | ||
208 | size = buf_pos[2]; | 197 | size = buf_pos[2]; |
209 | 198 | ||
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
219 | } | 208 | } |
220 | 209 | ||
221 | buf += UCODE_CONTAINER_HEADER_SIZE; | 210 | buf += UCODE_CONTAINER_HEADER_SIZE; |
222 | if (get_ucode_data(equiv_cpu_table, buf, size)) { | 211 | get_ucode_data(equiv_cpu_table, buf, size); |
223 | vfree(equiv_cpu_table); | ||
224 | return 0; | ||
225 | } | ||
226 | 212 | ||
227 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 213 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
228 | } | 214 | } |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ba0f0ca9f280..c01ffa5b9b87 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -143,7 +143,7 @@ static void flush_gart(void) | |||
143 | 143 | ||
144 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 144 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
145 | if (need_flush) { | 145 | if (need_flush) { |
146 | k8_flush_garts(); | 146 | amd_flush_garts(); |
147 | need_flush = false; | 147 | need_flush = false; |
148 | } | 148 | } |
149 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 149 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
@@ -561,17 +561,17 @@ static void enable_gart_translations(void) | |||
561 | { | 561 | { |
562 | int i; | 562 | int i; |
563 | 563 | ||
564 | if (!k8_northbridges.gart_supported) | 564 | if (!amd_nb_has_feature(AMD_NB_GART)) |
565 | return; | 565 | return; |
566 | 566 | ||
567 | for (i = 0; i < k8_northbridges.num; i++) { | 567 | for (i = 0; i < amd_nb_num(); i++) { |
568 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | 568 | struct pci_dev *dev = node_to_amd_nb(i)->misc; |
569 | 569 | ||
570 | enable_gart_translation(dev, __pa(agp_gatt_table)); | 570 | enable_gart_translation(dev, __pa(agp_gatt_table)); |
571 | } | 571 | } |
572 | 572 | ||
573 | /* Flush the GART-TLB to remove stale entries */ | 573 | /* Flush the GART-TLB to remove stale entries */ |
574 | k8_flush_garts(); | 574 | amd_flush_garts(); |
575 | } | 575 | } |
576 | 576 | ||
577 | /* | 577 | /* |
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev) | |||
596 | if (!fix_up_north_bridges) | 596 | if (!fix_up_north_bridges) |
597 | return; | 597 | return; |
598 | 598 | ||
599 | if (!k8_northbridges.gart_supported) | 599 | if (!amd_nb_has_feature(AMD_NB_GART)) |
600 | return; | 600 | return; |
601 | 601 | ||
602 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); | 602 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
603 | 603 | ||
604 | for (i = 0; i < k8_northbridges.num; i++) { | 604 | for (i = 0; i < amd_nb_num(); i++) { |
605 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | 605 | struct pci_dev *dev = node_to_amd_nb(i)->misc; |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * Don't enable translations just yet. That is the next | 608 | * Don't enable translations just yet. That is the next |
@@ -644,7 +644,7 @@ static struct sys_device device_gart = { | |||
644 | * Private Northbridge GATT initialization in case we cannot use the | 644 | * Private Northbridge GATT initialization in case we cannot use the |
645 | * AGP driver for some reason. | 645 | * AGP driver for some reason. |
646 | */ | 646 | */ |
647 | static __init int init_k8_gatt(struct agp_kern_info *info) | 647 | static __init int init_amd_gatt(struct agp_kern_info *info) |
648 | { | 648 | { |
649 | unsigned aper_size, gatt_size, new_aper_size; | 649 | unsigned aper_size, gatt_size, new_aper_size; |
650 | unsigned aper_base, new_aper_base; | 650 | unsigned aper_base, new_aper_base; |
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
656 | 656 | ||
657 | aper_size = aper_base = info->aper_size = 0; | 657 | aper_size = aper_base = info->aper_size = 0; |
658 | dev = NULL; | 658 | dev = NULL; |
659 | for (i = 0; i < k8_northbridges.num; i++) { | 659 | for (i = 0; i < amd_nb_num(); i++) { |
660 | dev = k8_northbridges.nb_misc[i]; | 660 | dev = node_to_amd_nb(i)->misc; |
661 | new_aper_base = read_aperture(dev, &new_aper_size); | 661 | new_aper_base = read_aperture(dev, &new_aper_size); |
662 | if (!new_aper_base) | 662 | if (!new_aper_base) |
663 | goto nommu; | 663 | goto nommu; |
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void) | |||
725 | if (!no_agp) | 725 | if (!no_agp) |
726 | return; | 726 | return; |
727 | 727 | ||
728 | if (!k8_northbridges.gart_supported) | 728 | if (!amd_nb_has_feature(AMD_NB_GART)) |
729 | return; | 729 | return; |
730 | 730 | ||
731 | for (i = 0; i < k8_northbridges.num; i++) { | 731 | for (i = 0; i < amd_nb_num(); i++) { |
732 | u32 ctl; | 732 | u32 ctl; |
733 | 733 | ||
734 | dev = k8_northbridges.nb_misc[i]; | 734 | dev = node_to_amd_nb(i)->misc; |
735 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); | 735 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); |
736 | 736 | ||
737 | ctl &= ~GARTEN; | 737 | ctl &= ~GARTEN; |
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void) | |||
749 | unsigned long scratch; | 749 | unsigned long scratch; |
750 | long i; | 750 | long i; |
751 | 751 | ||
752 | if (!k8_northbridges.gart_supported) | 752 | if (!amd_nb_has_feature(AMD_NB_GART)) |
753 | return 0; | 753 | return 0; |
754 | 754 | ||
755 | #ifndef CONFIG_AGP_AMD64 | 755 | #ifndef CONFIG_AGP_AMD64 |
756 | no_agp = 1; | 756 | no_agp = 1; |
757 | #else | 757 | #else |
758 | /* Makefile puts PCI initialization via subsys_initcall first. */ | 758 | /* Makefile puts PCI initialization via subsys_initcall first. */ |
759 | /* Add other K8 AGP bridge drivers here */ | 759 | /* Add other AMD AGP bridge drivers here */ |
760 | no_agp = no_agp || | 760 | no_agp = no_agp || |
761 | (agp_amd64_init() < 0) || | 761 | (agp_amd64_init() < 0) || |
762 | (agp_copy_info(agp_bridge, &info) < 0); | 762 | (agp_copy_info(agp_bridge, &info) < 0); |
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void) | |||
765 | if (no_iommu || | 765 | if (no_iommu || |
766 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || | 766 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || |
767 | !gart_iommu_aperture || | 767 | !gart_iommu_aperture || |
768 | (no_agp && init_k8_gatt(&info) < 0)) { | 768 | (no_agp && init_amd_gatt(&info) < 0)) { |
769 | if (max_pfn > MAX_DMA32_PFN) { | 769 | if (max_pfn > MAX_DMA32_PFN) { |
770 | pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); | 770 | pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); |
771 | pr_warning("falling back to iommu=soft.\n"); | 771 | pr_warning("falling back to iommu=soft.\n"); |
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index fda313ebbb03..c8e41e90f59c 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev) | |||
43 | outb(1, 0x92); | 43 | outb(1, 0x92); |
44 | } | 44 | } |
45 | 45 | ||
46 | static void ce4100_reset(struct pci_dev *dev) | ||
47 | { | ||
48 | int i; | ||
49 | |||
50 | for (i = 0; i < 10; i++) { | ||
51 | outb(0x2, 0xcf9); | ||
52 | udelay(50); | ||
53 | } | ||
54 | } | ||
55 | |||
46 | struct device_fixup { | 56 | struct device_fixup { |
47 | unsigned int vendor; | 57 | unsigned int vendor; |
48 | unsigned int device; | 58 | unsigned int device; |
49 | void (*reboot_fixup)(struct pci_dev *); | 59 | void (*reboot_fixup)(struct pci_dev *); |
50 | }; | 60 | }; |
51 | 61 | ||
62 | /* | ||
63 | * PCI ids solely used for fixups_table go here | ||
64 | */ | ||
65 | #define PCI_DEVICE_ID_INTEL_CE4100 0x0708 | ||
66 | |||
52 | static const struct device_fixup fixups_table[] = { | 67 | static const struct device_fixup fixups_table[] = { |
53 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, | 68 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, |
54 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, | 69 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, |
55 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, | 70 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, |
56 | { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, | 71 | { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, |
72 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset }, | ||
57 | }; | 73 | }; |
58 | 74 | ||
59 | /* | 75 | /* |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a0f52af256a0..d3cfe26c0252 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void) | |||
705 | void __init setup_arch(char **cmdline_p) | 705 | void __init setup_arch(char **cmdline_p) |
706 | { | 706 | { |
707 | int acpi = 0; | 707 | int acpi = 0; |
708 | int k8 = 0; | 708 | int amd = 0; |
709 | unsigned long flags; | 709 | unsigned long flags; |
710 | 710 | ||
711 | #ifdef CONFIG_X86_32 | 711 | #ifdef CONFIG_X86_32 |
@@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p) | |||
991 | acpi = acpi_numa_init(); | 991 | acpi = acpi_numa_init(); |
992 | #endif | 992 | #endif |
993 | 993 | ||
994 | #ifdef CONFIG_K8_NUMA | 994 | #ifdef CONFIG_AMD_NUMA |
995 | if (!acpi) | 995 | if (!acpi) |
996 | k8 = !k8_numa_init(0, max_pfn); | 996 | amd = !amd_numa_init(0, max_pfn); |
997 | #endif | 997 | #endif |
998 | 998 | ||
999 | initmem_init(0, max_pfn, acpi, k8); | 999 | initmem_init(0, max_pfn, acpi, amd); |
1000 | memblock_find_dma_reserve(); | 1000 | memblock_find_dma_reserve(); |
1001 | dma32_reserve_bootmem(); | 1001 | dma32_reserve_bootmem(); |
1002 | 1002 | ||
@@ -1045,10 +1045,7 @@ void __init setup_arch(char **cmdline_p) | |||
1045 | #endif | 1045 | #endif |
1046 | 1046 | ||
1047 | init_apic_mappings(); | 1047 | init_apic_mappings(); |
1048 | ioapic_init_mappings(); | 1048 | ioapic_and_gsi_init(); |
1049 | |||
1050 | /* need to wait for io_apic is mapped */ | ||
1051 | probe_nr_irqs_gsi(); | ||
1052 | 1049 | ||
1053 | kvm_guest_init(); | 1050 | kvm_guest_init(); |
1054 | 1051 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 68f61ac632e1..ee886fe10ef4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1161,6 +1161,20 @@ out: | |||
1161 | preempt_enable(); | 1161 | preempt_enable(); |
1162 | } | 1162 | } |
1163 | 1163 | ||
1164 | void arch_disable_nonboot_cpus_begin(void) | ||
1165 | { | ||
1166 | /* | ||
1167 | * Avoid the smp alternatives switch during the disable_nonboot_cpus(). | ||
1168 | * In the suspend path, we will be back in the SMP mode shortly anyways. | ||
1169 | */ | ||
1170 | skip_smp_alternatives = true; | ||
1171 | } | ||
1172 | |||
1173 | void arch_disable_nonboot_cpus_end(void) | ||
1174 | { | ||
1175 | skip_smp_alternatives = false; | ||
1176 | } | ||
1177 | |||
1164 | void arch_enable_nonboot_cpus_begin(void) | 1178 | void arch_enable_nonboot_cpus_begin(void) |
1165 | { | 1179 | { |
1166 | set_mtrr_aps_delayed_init(); | 1180 | set_mtrr_aps_delayed_init(); |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 3af2dff58b21..075d130efcf9 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -127,7 +127,7 @@ startup_64: | |||
127 | no_longmode: | 127 | no_longmode: |
128 | hlt | 128 | hlt |
129 | jmp no_longmode | 129 | jmp no_longmode |
130 | #include "verify_cpu_64.S" | 130 | #include "verify_cpu.S" |
131 | 131 | ||
132 | # Careful these need to be in the same 64K segment as the above; | 132 | # Careful these need to be in the same 64K segment as the above; |
133 | tidt: | 133 | tidt: |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0c40d8b72416..356a0d455cf9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void) | |||
872 | 872 | ||
873 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 873 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
874 | return 0; | 874 | return 0; |
875 | |||
876 | if (tsc_clocksource_reliable) | ||
877 | return 0; | ||
875 | /* | 878 | /* |
876 | * Intel systems are normally all synchronized. | 879 | * Intel systems are normally all synchronized. |
877 | * Exceptions must mark TSC as unstable: | 880 | * Exceptions must mark TSC as unstable: |
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void) | |||
879 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | 882 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
880 | /* assume multi socket systems are not synchronized: */ | 883 | /* assume multi socket systems are not synchronized: */ |
881 | if (num_possible_cpus() > 1) | 884 | if (num_possible_cpus() > 1) |
882 | tsc_unstable = 1; | 885 | return 1; |
883 | } | 886 | } |
884 | 887 | ||
885 | return tsc_unstable; | 888 | return 0; |
889 | } | ||
890 | |||
891 | |||
892 | static void tsc_refine_calibration_work(struct work_struct *work); | ||
893 | static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | ||
894 | /** | ||
895 | * tsc_refine_calibration_work - Further refine tsc freq calibration | ||
896 | * @work - ignored. | ||
897 | * | ||
898 | * This functions uses delayed work over a period of a | ||
899 | * second to further refine the TSC freq value. Since this is | ||
900 | * timer based, instead of loop based, we don't block the boot | ||
901 | * process while this longer calibration is done. | ||
902 | * | ||
903 | * If there are any calibration anomolies (too many SMIs, etc), | ||
904 | * or the refined calibration is off by 1% of the fast early | ||
905 | * calibration, we throw out the new calibration and use the | ||
906 | * early calibration. | ||
907 | */ | ||
908 | static void tsc_refine_calibration_work(struct work_struct *work) | ||
909 | { | ||
910 | static u64 tsc_start = -1, ref_start; | ||
911 | static int hpet; | ||
912 | u64 tsc_stop, ref_stop, delta; | ||
913 | unsigned long freq; | ||
914 | |||
915 | /* Don't bother refining TSC on unstable systems */ | ||
916 | if (check_tsc_unstable()) | ||
917 | goto out; | ||
918 | |||
919 | /* | ||
920 | * Since the work is started early in boot, we may be | ||
921 | * delayed the first time we expire. So set the workqueue | ||
922 | * again once we know timers are working. | ||
923 | */ | ||
924 | if (tsc_start == -1) { | ||
925 | /* | ||
926 | * Only set hpet once, to avoid mixing hardware | ||
927 | * if the hpet becomes enabled later. | ||
928 | */ | ||
929 | hpet = is_hpet_enabled(); | ||
930 | schedule_delayed_work(&tsc_irqwork, HZ); | ||
931 | tsc_start = tsc_read_refs(&ref_start, hpet); | ||
932 | return; | ||
933 | } | ||
934 | |||
935 | tsc_stop = tsc_read_refs(&ref_stop, hpet); | ||
936 | |||
937 | /* hpet or pmtimer available ? */ | ||
938 | if (!hpet && !ref_start && !ref_stop) | ||
939 | goto out; | ||
940 | |||
941 | /* Check, whether the sampling was disturbed by an SMI */ | ||
942 | if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) | ||
943 | goto out; | ||
944 | |||
945 | delta = tsc_stop - tsc_start; | ||
946 | delta *= 1000000LL; | ||
947 | if (hpet) | ||
948 | freq = calc_hpet_ref(delta, ref_start, ref_stop); | ||
949 | else | ||
950 | freq = calc_pmtimer_ref(delta, ref_start, ref_stop); | ||
951 | |||
952 | /* Make sure we're within 1% */ | ||
953 | if (abs(tsc_khz - freq) > tsc_khz/100) | ||
954 | goto out; | ||
955 | |||
956 | tsc_khz = freq; | ||
957 | printk(KERN_INFO "Refined TSC clocksource calibration: " | ||
958 | "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, | ||
959 | (unsigned long)tsc_khz % 1000); | ||
960 | |||
961 | out: | ||
962 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | ||
886 | } | 963 | } |
887 | 964 | ||
888 | static void __init init_tsc_clocksource(void) | 965 | |
966 | static int __init init_tsc_clocksource(void) | ||
889 | { | 967 | { |
968 | if (!cpu_has_tsc || tsc_disabled > 0) | ||
969 | return 0; | ||
970 | |||
890 | if (tsc_clocksource_reliable) | 971 | if (tsc_clocksource_reliable) |
891 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 972 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
892 | /* lower the rating if we already know its unstable: */ | 973 | /* lower the rating if we already know its unstable: */ |
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void) | |||
894 | clocksource_tsc.rating = 0; | 975 | clocksource_tsc.rating = 0; |
895 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 976 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
896 | } | 977 | } |
897 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 978 | schedule_delayed_work(&tsc_irqwork, 0); |
979 | return 0; | ||
898 | } | 980 | } |
981 | /* | ||
982 | * We use device_initcall here, to ensure we run after the hpet | ||
983 | * is fully initialized, which may occur at fs_initcall time. | ||
984 | */ | ||
985 | device_initcall(init_tsc_clocksource); | ||
899 | 986 | ||
900 | void __init tsc_init(void) | 987 | void __init tsc_init(void) |
901 | { | 988 | { |
@@ -949,6 +1036,5 @@ void __init tsc_init(void) | |||
949 | mark_tsc_unstable("TSCs unsynchronized"); | 1036 | mark_tsc_unstable("TSCs unsynchronized"); |
950 | 1037 | ||
951 | check_system_tsc_reliable(); | 1038 | check_system_tsc_reliable(); |
952 | init_tsc_clocksource(); | ||
953 | } | 1039 | } |
954 | 1040 | ||
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S index 56a8c2a867d9..0edefc19a113 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (c) 2007 Andi Kleen (ak@suse.de) | 7 | * Copyright (c) 2007 Andi Kleen (ak@suse.de) |
8 | * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) | 8 | * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) |
9 | * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) | 9 | * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) |
10 | * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) | ||
10 | * | 11 | * |
11 | * This source code is licensed under the GNU General Public License, | 12 | * This source code is licensed under the GNU General Public License, |
12 | * Version 2. See the file COPYING for more details. | 13 | * Version 2. See the file COPYING for more details. |
@@ -14,18 +15,17 @@ | |||
14 | * This is a common code for verification whether CPU supports | 15 | * This is a common code for verification whether CPU supports |
15 | * long mode and SSE or not. It is not called directly instead this | 16 | * long mode and SSE or not. It is not called directly instead this |
16 | * file is included at various places and compiled in that context. | 17 | * file is included at various places and compiled in that context. |
17 | * Following are the current usage. | 18 | * This file is expected to run in 32bit code. Currently: |
18 | * | 19 | * |
19 | * This file is included by both 16bit and 32bit code. | 20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification |
21 | * arch/x86/kernel/trampoline_64.S: secondary processor verfication | ||
22 | * arch/x86/kernel/head_32.S: processor startup | ||
20 | * | 23 | * |
21 | * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) | 24 | * verify_cpu, returns the status of longmode and SSE in register %eax. |
22 | * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) | ||
23 | * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) | ||
24 | * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) | ||
25 | * | ||
26 | * verify_cpu, returns the status of cpu check in register %eax. | ||
27 | * 0: Success 1: Failure | 25 | * 0: Success 1: Failure |
28 | * | 26 | * |
27 | * On Intel, the XD_DISABLE flag will be cleared as a side-effect. | ||
28 | * | ||
29 | * The caller needs to check for the error code and take the action | 29 | * The caller needs to check for the error code and take the action |
30 | * appropriately. Either display a message or halt. | 30 | * appropriately. Either display a message or halt. |
31 | */ | 31 | */ |
@@ -62,8 +62,41 @@ verify_cpu: | |||
62 | cmpl $0x444d4163,%ecx | 62 | cmpl $0x444d4163,%ecx |
63 | jnz verify_cpu_noamd | 63 | jnz verify_cpu_noamd |
64 | mov $1,%di # cpu is from AMD | 64 | mov $1,%di # cpu is from AMD |
65 | jmp verify_cpu_check | ||
65 | 66 | ||
66 | verify_cpu_noamd: | 67 | verify_cpu_noamd: |
68 | cmpl $0x756e6547,%ebx # GenuineIntel? | ||
69 | jnz verify_cpu_check | ||
70 | cmpl $0x49656e69,%edx | ||
71 | jnz verify_cpu_check | ||
72 | cmpl $0x6c65746e,%ecx | ||
73 | jnz verify_cpu_check | ||
74 | |||
75 | # only call IA32_MISC_ENABLE when: | ||
76 | # family > 6 || (family == 6 && model >= 0xd) | ||
77 | movl $0x1, %eax # check CPU family and model | ||
78 | cpuid | ||
79 | movl %eax, %ecx | ||
80 | |||
81 | andl $0x0ff00f00, %eax # mask family and extended family | ||
82 | shrl $8, %eax | ||
83 | cmpl $6, %eax | ||
84 | ja verify_cpu_clear_xd # family > 6, ok | ||
85 | jb verify_cpu_check # family < 6, skip | ||
86 | |||
87 | andl $0x000f00f0, %ecx # mask model and extended model | ||
88 | shrl $4, %ecx | ||
89 | cmpl $0xd, %ecx | ||
90 | jb verify_cpu_check # family == 6, model < 0xd, skip | ||
91 | |||
92 | verify_cpu_clear_xd: | ||
93 | movl $MSR_IA32_MISC_ENABLE, %ecx | ||
94 | rdmsr | ||
95 | btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE | ||
96 | jnc verify_cpu_check # only write MSR if bit was changed | ||
97 | wrmsr | ||
98 | |||
99 | verify_cpu_check: | ||
67 | movl $0x1,%eax # Does the cpu have what it takes | 100 | movl $0x1,%eax # Does the cpu have what it takes |
68 | cpuid | 101 | cpuid |
69 | andl $REQUIRED_MASK0,%edx | 102 | andl $REQUIRED_MASK0,%edx |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e03530aebfd0..bf4700755184 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -69,7 +69,7 @@ jiffies_64 = jiffies; | |||
69 | 69 | ||
70 | PHDRS { | 70 | PHDRS { |
71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
72 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | user PT_LOAD FLAGS(5); /* R_E */ | 74 | user PT_LOAD FLAGS(5); /* R_E */ |
75 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
@@ -116,6 +116,10 @@ SECTIONS | |||
116 | 116 | ||
117 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
118 | 118 | ||
119 | #if defined(CONFIG_DEBUG_RODATA) | ||
120 | /* .text should occupy whole number of pages */ | ||
121 | . = ALIGN(PAGE_SIZE); | ||
122 | #endif | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | 123 | X64_ALIGN_DEBUG_RODATA_BEGIN |
120 | RO_DATA(PAGE_SIZE) | 124 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | 125 | X64_ALIGN_DEBUG_RODATA_END |
@@ -335,7 +339,7 @@ SECTIONS | |||
335 | __bss_start = .; | 339 | __bss_start = .; |
336 | *(.bss..page_aligned) | 340 | *(.bss..page_aligned) |
337 | *(.bss) | 341 | *(.bss) |
338 | . = ALIGN(4); | 342 | . = ALIGN(PAGE_SIZE); |
339 | __bss_stop = .; | 343 | __bss_stop = .; |
340 | } | 344 | } |
341 | 345 | ||
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index e7d5382ef263..4f420c2f2d55 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -4,7 +4,6 @@ | |||
4 | #include <asm/asm-offsets.h> | 4 | #include <asm/asm-offsets.h> |
5 | #include <asm/thread_info.h> | 5 | #include <asm/thread_info.h> |
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | #include <asm/pgtable.h> | ||
8 | 7 | ||
9 | /*G:020 | 8 | /*G:020 |
10 | * Our story starts with the kernel booting into startup_32 in | 9 | * Our story starts with the kernel booting into startup_32 in |
@@ -38,113 +37,9 @@ ENTRY(lguest_entry) | |||
38 | /* Set up the initial stack so we can run C code. */ | 37 | /* Set up the initial stack so we can run C code. */ |
39 | movl $(init_thread_union+THREAD_SIZE),%esp | 38 | movl $(init_thread_union+THREAD_SIZE),%esp |
40 | 39 | ||
41 | call init_pagetables | ||
42 | |||
43 | /* Jumps are relative: we're running __PAGE_OFFSET too low. */ | 40 | /* Jumps are relative: we're running __PAGE_OFFSET too low. */ |
44 | jmp lguest_init+__PAGE_OFFSET | 41 | jmp lguest_init+__PAGE_OFFSET |
45 | 42 | ||
46 | /* | ||
47 | * Initialize page tables. This creates a PDE and a set of page | ||
48 | * tables, which are located immediately beyond __brk_base. The variable | ||
49 | * _brk_end is set up to point to the first "safe" location. | ||
50 | * Mappings are created both at virtual address 0 (identity mapping) | ||
51 | * and PAGE_OFFSET for up to _end. | ||
52 | * | ||
53 | * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they | ||
54 | * don't have a stack at this point, so we can't just use call and ret. | ||
55 | */ | ||
56 | init_pagetables: | ||
57 | #if PTRS_PER_PMD > 1 | ||
58 | #define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) | ||
59 | #else | ||
60 | #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) | ||
61 | #endif | ||
62 | #define pa(X) ((X) - __PAGE_OFFSET) | ||
63 | |||
64 | /* Enough space to fit pagetables for the low memory linear map */ | ||
65 | MAPPING_BEYOND_END = \ | ||
66 | PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT | ||
67 | #ifdef CONFIG_X86_PAE | ||
68 | |||
69 | /* | ||
70 | * In PAE mode initial_page_table is statically defined to contain | ||
71 | * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 | ||
72 | * entries). The identity mapping is handled by pointing two PGD entries | ||
73 | * to the first kernel PMD. | ||
74 | * | ||
75 | * Note the upper half of each PMD or PTE are always zero at this stage. | ||
76 | */ | ||
77 | |||
78 | #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ | ||
79 | |||
80 | xorl %ebx,%ebx /* %ebx is kept at zero */ | ||
81 | |||
82 | movl $pa(__brk_base), %edi | ||
83 | movl $pa(initial_pg_pmd), %edx | ||
84 | movl $PTE_IDENT_ATTR, %eax | ||
85 | 10: | ||
86 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ | ||
87 | movl %ecx,(%edx) /* Store PMD entry */ | ||
88 | /* Upper half already zero */ | ||
89 | addl $8,%edx | ||
90 | movl $512,%ecx | ||
91 | 11: | ||
92 | stosl | ||
93 | xchgl %eax,%ebx | ||
94 | stosl | ||
95 | xchgl %eax,%ebx | ||
96 | addl $0x1000,%eax | ||
97 | loop 11b | ||
98 | |||
99 | /* | ||
100 | * End condition: we must map up to the end + MAPPING_BEYOND_END. | ||
101 | */ | ||
102 | movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp | ||
103 | cmpl %ebp,%eax | ||
104 | jb 10b | ||
105 | 1: | ||
106 | addl $__PAGE_OFFSET, %edi | ||
107 | movl %edi, pa(_brk_end) | ||
108 | shrl $12, %eax | ||
109 | movl %eax, pa(max_pfn_mapped) | ||
110 | |||
111 | /* Do early initialization of the fixmap area */ | ||
112 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax | ||
113 | movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) | ||
114 | #else /* Not PAE */ | ||
115 | |||
116 | page_pde_offset = (__PAGE_OFFSET >> 20); | ||
117 | |||
118 | movl $pa(__brk_base), %edi | ||
119 | movl $pa(initial_page_table), %edx | ||
120 | movl $PTE_IDENT_ATTR, %eax | ||
121 | 10: | ||
122 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ | ||
123 | movl %ecx,(%edx) /* Store identity PDE entry */ | ||
124 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | ||
125 | addl $4,%edx | ||
126 | movl $1024, %ecx | ||
127 | 11: | ||
128 | stosl | ||
129 | addl $0x1000,%eax | ||
130 | loop 11b | ||
131 | /* | ||
132 | * End condition: we must map up to the end + MAPPING_BEYOND_END. | ||
133 | */ | ||
134 | movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp | ||
135 | cmpl %ebp,%eax | ||
136 | jb 10b | ||
137 | addl $__PAGE_OFFSET, %edi | ||
138 | movl %edi, pa(_brk_end) | ||
139 | shrl $12, %eax | ||
140 | movl %eax, pa(max_pfn_mapped) | ||
141 | |||
142 | /* Do early initialization of the fixmap area */ | ||
143 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax | ||
144 | movl %eax,pa(initial_page_table+0xffc) | ||
145 | #endif | ||
146 | ret | ||
147 | |||
148 | /*G:055 | 43 | /*G:055 |
149 | * We create a macro which puts the assembler code between lgstart_ and lgend_ | 44 | * We create a macro which puts the assembler code between lgstart_ and lgend_ |
150 | * markers. These templates are put in the .text section: they can't be | 45 | * markers. These templates are put in the .text section: they can't be |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 55543397a8a7..09df2f9a3d69 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | |||
23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 23 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
24 | 24 | ||
25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o | 25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o |
26 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o | 26 | obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o |
27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
28 | 28 | ||
29 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 29 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/amdtopology_64.c index 804a3b6c6e14..51fae9cfdecb 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -1,8 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * AMD K8 NUMA support. | 2 | * AMD NUMA support. |
3 | * Discover the memory map and associated nodes. | 3 | * Discover the memory map and associated nodes. |
4 | * | 4 | * |
5 | * This version reads it directly from the K8 northbridge. | 5 | * This version reads it directly from the AMD northbridge. |
6 | * | 6 | * |
7 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 7 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
8 | */ | 8 | */ |
@@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void) | |||
57 | { | 57 | { |
58 | /* | 58 | /* |
59 | * need to get the APIC ID of the BSP so can use that to | 59 | * need to get the APIC ID of the BSP so can use that to |
60 | * create apicid_to_node in k8_scan_nodes() | 60 | * create apicid_to_node in amd_scan_nodes() |
61 | */ | 61 | */ |
62 | #ifdef CONFIG_X86_MPPARSE | 62 | #ifdef CONFIG_X86_MPPARSE |
63 | /* | 63 | /* |
@@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void) | |||
69 | early_init_lapic_mapping(); | 69 | early_init_lapic_mapping(); |
70 | } | 70 | } |
71 | 71 | ||
72 | int __init k8_get_nodes(struct bootnode *physnodes) | 72 | int __init amd_get_nodes(struct bootnode *physnodes) |
73 | { | 73 | { |
74 | int i; | 74 | int i; |
75 | int ret = 0; | 75 | int ret = 0; |
@@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes) | |||
82 | return ret; | 82 | return ret; |
83 | } | 83 | } |
84 | 84 | ||
85 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | 85 | int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) |
86 | { | 86 | { |
87 | unsigned long start = PFN_PHYS(start_pfn); | 87 | unsigned long start = PFN_PHYS(start_pfn); |
88 | unsigned long end = PFN_PHYS(end_pfn); | 88 | unsigned long end = PFN_PHYS(end_pfn); |
@@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
194 | return 0; | 194 | return 0; |
195 | } | 195 | } |
196 | 196 | ||
197 | int __init k8_scan_nodes(void) | 197 | int __init amd_scan_nodes(void) |
198 | { | 198 | { |
199 | unsigned int bits; | 199 | unsigned int bits; |
200 | unsigned int cores; | 200 | unsigned int cores; |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7d..947f42abe820 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
364 | /* | 364 | /* |
365 | * We just marked the kernel text read only above, now that | 365 | * We just marked the kernel text read only above, now that |
366 | * we are going to free part of that, we need to make that | 366 | * we are going to free part of that, we need to make that |
367 | * writeable first. | 367 | * writeable and non-executable first. |
368 | */ | 368 | */ |
369 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | ||
369 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 370 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
370 | 371 | ||
371 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | 372 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e969f9f401b..f89b5bb4e93f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
226 | 226 | ||
227 | static inline int is_kernel_text(unsigned long addr) | 227 | static inline int is_kernel_text(unsigned long addr) |
228 | { | 228 | { |
229 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) | 229 | if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) |
230 | return 1; | 230 | return 1; |
231 | return 0; | 231 | return 0; |
232 | } | 232 | } |
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void) | |||
912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
913 | } | 913 | } |
914 | 914 | ||
915 | static void mark_nxdata_nx(void) | ||
916 | { | ||
917 | /* | ||
918 | * When this called, init has already been executed and released, | ||
919 | * so everything past _etext sould be NX. | ||
920 | */ | ||
921 | unsigned long start = PFN_ALIGN(_etext); | ||
922 | /* | ||
923 | * This comes from is_kernel_text upper limit. Also HPAGE where used: | ||
924 | */ | ||
925 | unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; | ||
926 | |||
927 | if (__supported_pte_mask & _PAGE_NX) | ||
928 | printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); | ||
929 | set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); | ||
930 | } | ||
931 | |||
915 | void mark_rodata_ro(void) | 932 | void mark_rodata_ro(void) |
916 | { | 933 | { |
917 | unsigned long start = PFN_ALIGN(_text); | 934 | unsigned long start = PFN_ALIGN(_text); |
@@ -946,6 +963,7 @@ void mark_rodata_ro(void) | |||
946 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 963 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
947 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 964 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
948 | #endif | 965 | #endif |
966 | mark_nxdata_nx(); | ||
949 | } | 967 | } |
950 | #endif | 968 | #endif |
951 | 969 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 7ffc9b727efd..7762a517d69d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata; | |||
264 | static char *cmdline __initdata; | 264 | static char *cmdline __initdata; |
265 | 265 | ||
266 | static int __init setup_physnodes(unsigned long start, unsigned long end, | 266 | static int __init setup_physnodes(unsigned long start, unsigned long end, |
267 | int acpi, int k8) | 267 | int acpi, int amd) |
268 | { | 268 | { |
269 | int nr_nodes = 0; | 269 | int nr_nodes = 0; |
270 | int ret = 0; | 270 | int ret = 0; |
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, | |||
274 | if (acpi) | 274 | if (acpi) |
275 | nr_nodes = acpi_get_nodes(physnodes); | 275 | nr_nodes = acpi_get_nodes(physnodes); |
276 | #endif | 276 | #endif |
277 | #ifdef CONFIG_K8_NUMA | 277 | #ifdef CONFIG_AMD_NUMA |
278 | if (k8) | 278 | if (amd) |
279 | nr_nodes = k8_get_nodes(physnodes); | 279 | nr_nodes = amd_get_nodes(physnodes); |
280 | #endif | 280 | #endif |
281 | /* | 281 | /* |
282 | * Basic sanity checking on the physical node map: there may be errors | 282 | * Basic sanity checking on the physical node map: there may be errors |
283 | * if the SRAT or K8 incorrectly reported the topology or the mem= | 283 | * if the SRAT or AMD code incorrectly reported the topology or the mem= |
284 | * kernel parameter is used. | 284 | * kernel parameter is used. |
285 | */ | 285 | */ |
286 | for (i = 0; i < nr_nodes; i++) { | 286 | for (i = 0; i < nr_nodes; i++) { |
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
549 | * numa=fake command-line option. | 549 | * numa=fake command-line option. |
550 | */ | 550 | */ |
551 | static int __init numa_emulation(unsigned long start_pfn, | 551 | static int __init numa_emulation(unsigned long start_pfn, |
552 | unsigned long last_pfn, int acpi, int k8) | 552 | unsigned long last_pfn, int acpi, int amd) |
553 | { | 553 | { |
554 | u64 addr = start_pfn << PAGE_SHIFT; | 554 | u64 addr = start_pfn << PAGE_SHIFT; |
555 | u64 max_addr = last_pfn << PAGE_SHIFT; | 555 | u64 max_addr = last_pfn << PAGE_SHIFT; |
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
557 | int num_nodes; | 557 | int num_nodes; |
558 | int i; | 558 | int i; |
559 | 559 | ||
560 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); | 560 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd); |
561 | /* | 561 | /* |
562 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 562 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
563 | * the fixed node size. Otherwise, if it is just a single number N, | 563 | * the fixed node size. Otherwise, if it is just a single number N, |
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
602 | #endif /* CONFIG_NUMA_EMU */ | 602 | #endif /* CONFIG_NUMA_EMU */ |
603 | 603 | ||
604 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | 604 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
605 | int acpi, int k8) | 605 | int acpi, int amd) |
606 | { | 606 | { |
607 | int i; | 607 | int i; |
608 | 608 | ||
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | |||
610 | nodes_clear(node_online_map); | 610 | nodes_clear(node_online_map); |
611 | 611 | ||
612 | #ifdef CONFIG_NUMA_EMU | 612 | #ifdef CONFIG_NUMA_EMU |
613 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) | 613 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) |
614 | return; | 614 | return; |
615 | nodes_clear(node_possible_map); | 615 | nodes_clear(node_possible_map); |
616 | nodes_clear(node_online_map); | 616 | nodes_clear(node_online_map); |
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | |||
624 | nodes_clear(node_online_map); | 624 | nodes_clear(node_online_map); |
625 | #endif | 625 | #endif |
626 | 626 | ||
627 | #ifdef CONFIG_K8_NUMA | 627 | #ifdef CONFIG_AMD_NUMA |
628 | if (!numa_off && k8 && !k8_scan_nodes()) | 628 | if (!numa_off && amd && !amd_scan_nodes()) |
629 | return; | 629 | return; |
630 | nodes_clear(node_possible_map); | 630 | nodes_clear(node_possible_map); |
631 | nodes_clear(node_online_map); | 631 | nodes_clear(node_online_map); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 532e7933d606..8b830ca14ac4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/pci.h> | ||
16 | 17 | ||
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
255 | unsigned long pfn) | 256 | unsigned long pfn) |
256 | { | 257 | { |
257 | pgprot_t forbidden = __pgprot(0); | 258 | pgprot_t forbidden = __pgprot(0); |
259 | pgprot_t required = __pgprot(0); | ||
258 | 260 | ||
259 | /* | 261 | /* |
260 | * The BIOS area between 640k and 1Mb needs to be executable for | 262 | * The BIOS area between 640k and 1Mb needs to be executable for |
261 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | 263 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. |
262 | */ | 264 | */ |
263 | if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | 265 | #ifdef CONFIG_PCI_BIOS |
266 | if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | ||
264 | pgprot_val(forbidden) |= _PAGE_NX; | 267 | pgprot_val(forbidden) |= _PAGE_NX; |
268 | #endif | ||
265 | 269 | ||
266 | /* | 270 | /* |
267 | * The kernel text needs to be executable for obvious reasons | 271 | * The kernel text needs to be executable for obvious reasons |
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
278 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | 282 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, |
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 283 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 284 | pgprot_val(forbidden) |= _PAGE_RW; |
285 | /* | ||
286 | * .data and .bss should always be writable. | ||
287 | */ | ||
288 | if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || | ||
289 | within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) | ||
290 | pgprot_val(required) |= _PAGE_RW; | ||
281 | 291 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | 292 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) |
283 | /* | 293 | /* |
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
317 | #endif | 327 | #endif |
318 | 328 | ||
319 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 329 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
330 | prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); | ||
320 | 331 | ||
321 | return prot; | 332 | return prot; |
322 | } | 333 | } |
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
393 | { | 404 | { |
394 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 405 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; |
395 | pte_t new_pte, old_pte, *tmp; | 406 | pte_t new_pte, old_pte, *tmp; |
396 | pgprot_t old_prot, new_prot; | 407 | pgprot_t old_prot, new_prot, req_prot; |
397 | int i, do_split = 1; | 408 | int i, do_split = 1; |
398 | unsigned int level; | 409 | unsigned int level; |
399 | 410 | ||
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
438 | * We are safe now. Check whether the new pgprot is the same: | 449 | * We are safe now. Check whether the new pgprot is the same: |
439 | */ | 450 | */ |
440 | old_pte = *kpte; | 451 | old_pte = *kpte; |
441 | old_prot = new_prot = pte_pgprot(old_pte); | 452 | old_prot = new_prot = req_prot = pte_pgprot(old_pte); |
442 | 453 | ||
443 | pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | 454 | pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); |
444 | pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | 455 | pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); |
445 | 456 | ||
446 | /* | 457 | /* |
447 | * old_pte points to the large page base address. So we need | 458 | * old_pte points to the large page base address. So we need |
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
450 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); | 461 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); |
451 | cpa->pfn = pfn; | 462 | cpa->pfn = pfn; |
452 | 463 | ||
453 | new_prot = static_protections(new_prot, address, pfn); | 464 | new_prot = static_protections(req_prot, address, pfn); |
454 | 465 | ||
455 | /* | 466 | /* |
456 | * We need to check the full range, whether | 467 | * We need to check the full range, whether |
457 | * static_protection() requires a different pgprot for one of | 468 | * static_protection() requires a different pgprot for one of |
458 | * the pages in the range we try to preserve: | 469 | * the pages in the range we try to preserve: |
459 | */ | 470 | */ |
460 | addr = address + PAGE_SIZE; | 471 | addr = address & pmask; |
461 | pfn++; | 472 | pfn = pte_pfn(old_pte); |
462 | for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { | 473 | for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { |
463 | pgprot_t chk_prot = static_protections(new_prot, addr, pfn); | 474 | pgprot_t chk_prot = static_protections(req_prot, addr, pfn); |
464 | 475 | ||
465 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | 476 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) |
466 | goto out_unlock; | 477 | goto out_unlock; |
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
483 | * that we limited the number of possible pages already to | 494 | * that we limited the number of possible pages already to |
484 | * the number of pages in the large page. | 495 | * the number of pages in the large page. |
485 | */ | 496 | */ |
486 | if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | 497 | if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { |
487 | /* | 498 | /* |
488 | * The address is aligned and the number of pages | 499 | * The address is aligned and the number of pages |
489 | * covers the full page. | 500 | * covers the full page. |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index a3250aa34086..410531d3c292 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -41,7 +41,7 @@ void __init x86_report_nx(void) | |||
41 | { | 41 | { |
42 | if (!cpu_has_nx) { | 42 | if (!cpu_has_nx) { |
43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
44 | "missing in CPU or disabled in BIOS!\n"); | 44 | "missing in CPU!\n"); |
45 | } else { | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
47 | if (disable_nx) { | 47 | if (disable_nx) { |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index a17dffd136c1..f16434568a51 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | |||
92 | /* mark this node as "seen" in node bitmap */ | 92 | /* mark this node as "seen" in node bitmap */ |
93 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); | 93 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); |
94 | 94 | ||
95 | /* don't need to check apic_id here, because it is always 8 bits */ | ||
95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; | 96 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; |
96 | 97 | ||
97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", | 98 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b060..171a0aacb99a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
134 | } | 134 | } |
135 | 135 | ||
136 | apic_id = pa->apic_id; | 136 | apic_id = pa->apic_id; |
137 | if (apic_id >= MAX_LOCAL_APIC) { | ||
138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
139 | return; | ||
140 | } | ||
137 | apicid_to_node[apic_id] = node; | 141 | apicid_to_node[apic_id] = node; |
138 | node_set(node, cpu_nodes_parsed); | 142 | node_set(node, cpu_nodes_parsed); |
139 | acpi_numa = 1; | 143 | acpi_numa = 1; |
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
168 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; | 172 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; |
169 | else | 173 | else |
170 | apic_id = pa->apic_id; | 174 | apic_id = pa->apic_id; |
175 | |||
176 | if (apic_id >= MAX_LOCAL_APIC) { | ||
177 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
178 | return; | ||
179 | } | ||
180 | |||
171 | apicid_to_node[apic_id] = node; | 181 | apicid_to_node[apic_id] = node; |
172 | node_set(node, cpu_nodes_parsed); | 182 | node_set(node, cpu_nodes_parsed); |
173 | acpi_numa = 1; | 183 | acpi_numa = 1; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 51104b33fd51..c3b8e24f2b16 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -610,6 +610,7 @@ static int force_ibs_eilvt_setup(void) | |||
610 | ret = setup_ibs_ctl(i); | 610 | ret = setup_ibs_ctl(i); |
611 | if (ret) | 611 | if (ret) |
612 | return ret; | 612 | return ret; |
613 | pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); | ||
613 | return 0; | 614 | return 0; |
614 | } | 615 | } |
615 | 616 | ||
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index effd96e33f16..6b8759f7634e 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o | |||
7 | obj-$(CONFIG_PCI_XEN) += xen.o | 7 | obj-$(CONFIG_PCI_XEN) += xen.o |
8 | 8 | ||
9 | obj-y += fixup.o | 9 | obj-y += fixup.o |
10 | obj-$(CONFIG_X86_INTEL_CE) += ce4100.o | ||
10 | obj-$(CONFIG_ACPI) += acpi.o | 11 | obj-$(CONFIG_ACPI) += acpi.o |
11 | obj-y += legacy.o irq.o | 12 | obj-y += legacy.o irq.o |
12 | 13 | ||
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c new file mode 100644 index 000000000000..85b68ef5e809 --- /dev/null +++ b/arch/x86/pci/ce4100.c | |||
@@ -0,0 +1,315 @@ | |||
1 | /* | ||
2 | * GPL LICENSE SUMMARY | ||
3 | * | ||
4 | * Copyright(c) 2010 Intel Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of version 2 of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * The full GNU General Public License is included in this distribution | ||
19 | * in the file called LICENSE.GPL. | ||
20 | * | ||
21 | * Contact Information: | ||
22 | * Intel Corporation | ||
23 | * 2200 Mission College Blvd. | ||
24 | * Santa Clara, CA 97052 | ||
25 | * | ||
26 | * This provides access methods for PCI registers that mis-behave on | ||
27 | * the CE4100. Each register can be assigned a private init, read and | ||
28 | * write routine. The exception to this is the bridge device. The | ||
29 | * bridge device is the only device on bus zero (0) that requires any | ||
30 | * fixup so it is a special case ATM | ||
31 | */ | ||
32 | |||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/init.h> | ||
36 | |||
37 | #include <asm/pci_x86.h> | ||
38 | |||
39 | struct sim_reg { | ||
40 | u32 value; | ||
41 | u32 mask; | ||
42 | }; | ||
43 | |||
44 | struct sim_dev_reg { | ||
45 | int dev_func; | ||
46 | int reg; | ||
47 | void (*init)(struct sim_dev_reg *reg); | ||
48 | void (*read)(struct sim_dev_reg *reg, u32 *value); | ||
49 | void (*write)(struct sim_dev_reg *reg, u32 value); | ||
50 | struct sim_reg sim_reg; | ||
51 | }; | ||
52 | |||
53 | struct sim_reg_op { | ||
54 | void (*init)(struct sim_dev_reg *reg); | ||
55 | void (*read)(struct sim_dev_reg *reg, u32 value); | ||
56 | void (*write)(struct sim_dev_reg *reg, u32 value); | ||
57 | }; | ||
58 | |||
59 | #define MB (1024 * 1024) | ||
60 | #define KB (1024) | ||
61 | #define SIZE_TO_MASK(size) (~(size - 1)) | ||
62 | |||
63 | #define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\ | ||
64 | { PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ | ||
65 | {0, SIZE_TO_MASK(size)} }, | ||
66 | |||
67 | static void reg_init(struct sim_dev_reg *reg) | ||
68 | { | ||
69 | pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, | ||
70 | ®->sim_reg.value); | ||
71 | } | ||
72 | |||
73 | static void reg_read(struct sim_dev_reg *reg, u32 *value) | ||
74 | { | ||
75 | unsigned long flags; | ||
76 | |||
77 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
78 | *value = reg->sim_reg.value; | ||
79 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
80 | } | ||
81 | |||
82 | static void reg_write(struct sim_dev_reg *reg, u32 value) | ||
83 | { | ||
84 | unsigned long flags; | ||
85 | |||
86 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
87 | reg->sim_reg.value = (value & reg->sim_reg.mask) | | ||
88 | (reg->sim_reg.value & ~reg->sim_reg.mask); | ||
89 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
90 | } | ||
91 | |||
92 | static void sata_reg_init(struct sim_dev_reg *reg) | ||
93 | { | ||
94 | pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4, | ||
95 | ®->sim_reg.value); | ||
96 | reg->sim_reg.value += 0x400; | ||
97 | } | ||
98 | |||
99 | static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) | ||
100 | { | ||
101 | reg_read(reg, value); | ||
102 | if (*value != reg->sim_reg.mask) | ||
103 | *value |= 0x100; | ||
104 | } | ||
105 | |||
106 | void sata_revid_init(struct sim_dev_reg *reg) | ||
107 | { | ||
108 | reg->sim_reg.value = 0x01060100; | ||
109 | reg->sim_reg.mask = 0; | ||
110 | } | ||
111 | |||
112 | static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) | ||
113 | { | ||
114 | reg_read(reg, value); | ||
115 | } | ||
116 | |||
117 | static struct sim_dev_reg bus1_fixups[] = { | ||
118 | DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write) | ||
119 | DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write) | ||
120 | DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
121 | DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
122 | DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) | ||
123 | DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write) | ||
124 | DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write) | ||
125 | DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write) | ||
126 | DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
127 | DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write) | ||
128 | DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
129 | DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
130 | DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write) | ||
131 | DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) | ||
132 | DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write) | ||
133 | DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write) | ||
134 | DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write) | ||
135 | DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write) | ||
136 | DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write) | ||
137 | DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write) | ||
138 | DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write) | ||
139 | DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write) | ||
140 | DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write) | ||
141 | DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write) | ||
142 | DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write) | ||
143 | DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
144 | DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write) | ||
145 | DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
146 | DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) | ||
147 | DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write) | ||
148 | DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write) | ||
149 | DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) | ||
150 | DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) | ||
151 | DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0) | ||
152 | DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write) | ||
153 | DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write) | ||
154 | DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write) | ||
155 | DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write) | ||
156 | DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write) | ||
157 | DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write) | ||
158 | DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
159 | DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) | ||
160 | DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) | ||
161 | DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write) | ||
162 | DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write) | ||
163 | DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) | ||
164 | DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write) | ||
165 | }; | ||
166 | |||
167 | static void __init init_sim_regs(void) | ||
168 | { | ||
169 | int i; | ||
170 | |||
171 | for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { | ||
172 | if (bus1_fixups[i].init) | ||
173 | bus1_fixups[i].init(&bus1_fixups[i]); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static inline void extract_bytes(u32 *value, int reg, int len) | ||
178 | { | ||
179 | uint32_t mask; | ||
180 | |||
181 | *value >>= ((reg & 3) * 8); | ||
182 | mask = 0xFFFFFFFF >> ((4 - len) * 8); | ||
183 | *value &= mask; | ||
184 | } | ||
185 | |||
186 | int bridge_read(unsigned int devfn, int reg, int len, u32 *value) | ||
187 | { | ||
188 | u32 av_bridge_base, av_bridge_limit; | ||
189 | int retval = 0; | ||
190 | |||
191 | switch (reg) { | ||
192 | /* Make BARs appear to not request any memory. */ | ||
193 | case PCI_BASE_ADDRESS_0: | ||
194 | case PCI_BASE_ADDRESS_0 + 1: | ||
195 | case PCI_BASE_ADDRESS_0 + 2: | ||
196 | case PCI_BASE_ADDRESS_0 + 3: | ||
197 | *value = 0; | ||
198 | break; | ||
199 | |||
200 | /* Since subordinate bus number register is hardwired | ||
201 | * to zero and read only, so do the simulation. | ||
202 | */ | ||
203 | case PCI_PRIMARY_BUS: | ||
204 | if (len == 4) | ||
205 | *value = 0x00010100; | ||
206 | break; | ||
207 | |||
208 | case PCI_SUBORDINATE_BUS: | ||
209 | *value = 1; | ||
210 | break; | ||
211 | |||
212 | case PCI_MEMORY_BASE: | ||
213 | case PCI_MEMORY_LIMIT: | ||
214 | /* Get the A/V bridge base address. */ | ||
215 | pci_direct_conf1.read(0, 0, devfn, | ||
216 | PCI_BASE_ADDRESS_0, 4, &av_bridge_base); | ||
217 | |||
218 | av_bridge_limit = av_bridge_base + (512*MB - 1); | ||
219 | av_bridge_limit >>= 16; | ||
220 | av_bridge_limit &= 0xFFF0; | ||
221 | |||
222 | av_bridge_base >>= 16; | ||
223 | av_bridge_base &= 0xFFF0; | ||
224 | |||
225 | if (reg == PCI_MEMORY_LIMIT) | ||
226 | *value = av_bridge_limit; | ||
227 | else if (len == 2) | ||
228 | *value = av_bridge_base; | ||
229 | else | ||
230 | *value = (av_bridge_limit << 16) | av_bridge_base; | ||
231 | break; | ||
232 | /* Make prefetchable memory limit smaller than prefetchable | ||
233 | * memory base, so not claim prefetchable memory space. | ||
234 | */ | ||
235 | case PCI_PREF_MEMORY_BASE: | ||
236 | *value = 0xFFF0; | ||
237 | break; | ||
238 | case PCI_PREF_MEMORY_LIMIT: | ||
239 | *value = 0x0; | ||
240 | break; | ||
241 | /* Make IO limit smaller than IO base, so not claim IO space. */ | ||
242 | case PCI_IO_BASE: | ||
243 | *value = 0xF0; | ||
244 | break; | ||
245 | case PCI_IO_LIMIT: | ||
246 | *value = 0; | ||
247 | break; | ||
248 | default: | ||
249 | retval = 1; | ||
250 | } | ||
251 | return retval; | ||
252 | } | ||
253 | |||
254 | static int ce4100_conf_read(unsigned int seg, unsigned int bus, | ||
255 | unsigned int devfn, int reg, int len, u32 *value) | ||
256 | { | ||
257 | int i, retval = 1; | ||
258 | |||
259 | if (bus == 1) { | ||
260 | for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { | ||
261 | if (bus1_fixups[i].dev_func == devfn && | ||
262 | bus1_fixups[i].reg == (reg & ~3) && | ||
263 | bus1_fixups[i].read) { | ||
264 | bus1_fixups[i].read(&(bus1_fixups[i]), | ||
265 | value); | ||
266 | extract_bytes(value, reg, len); | ||
267 | return 0; | ||
268 | } | ||
269 | } | ||
270 | } | ||
271 | |||
272 | if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && | ||
273 | !bridge_read(devfn, reg, len, value)) | ||
274 | return 0; | ||
275 | |||
276 | return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); | ||
277 | } | ||
278 | |||
279 | static int ce4100_conf_write(unsigned int seg, unsigned int bus, | ||
280 | unsigned int devfn, int reg, int len, u32 value) | ||
281 | { | ||
282 | int i; | ||
283 | |||
284 | if (bus == 1) { | ||
285 | for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { | ||
286 | if (bus1_fixups[i].dev_func == devfn && | ||
287 | bus1_fixups[i].reg == (reg & ~3) && | ||
288 | bus1_fixups[i].write) { | ||
289 | bus1_fixups[i].write(&(bus1_fixups[i]), | ||
290 | value); | ||
291 | return 0; | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /* Discard writes to A/V bridge BAR. */ | ||
297 | if (bus == 0 && PCI_DEVFN(1, 0) == devfn && | ||
298 | ((reg & ~3) == PCI_BASE_ADDRESS_0)) | ||
299 | return 0; | ||
300 | |||
301 | return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); | ||
302 | } | ||
303 | |||
304 | struct pci_raw_ops ce4100_pci_conf = { | ||
305 | .read = ce4100_conf_read, | ||
306 | .write = ce4100_conf_write, | ||
307 | }; | ||
308 | |||
309 | static int __init ce4100_pci_init(void) | ||
310 | { | ||
311 | init_sim_regs(); | ||
312 | raw_pci_ops = &ce4100_pci_conf; | ||
313 | return 0; | ||
314 | } | ||
315 | subsys_initcall(ce4100_pci_init); | ||
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 2492d165096a..a5f7d0d63de0 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
11 | #include <asm/pci-functions.h> | 11 | #include <asm/pci-functions.h> |
12 | #include <asm/cacheflush.h> | ||
12 | 13 | ||
13 | /* BIOS32 signature: "_32_" */ | 14 | /* BIOS32 signature: "_32_" */ |
14 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) | 15 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) |
@@ -25,6 +26,27 @@ | |||
25 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 | 26 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 |
26 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 | 27 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 |
27 | 28 | ||
29 | int pcibios_enabled; | ||
30 | |||
31 | /* According to the BIOS specification at: | ||
32 | * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could | ||
33 | * restrict the x zone to some pages and make it ro. But this may be | ||
34 | * broken on some bios, complex to handle with static_protections. | ||
35 | * We could make the 0xe0000-0x100000 range rox, but this can break | ||
36 | * some ISA mapping. | ||
37 | * | ||
38 | * So we let's an rw and x hole when pcibios is used. This shouldn't | ||
39 | * happen for modern system with mmconfig, and if you don't want it | ||
40 | * you could disable pcibios... | ||
41 | */ | ||
42 | static inline void set_bios_x(void) | ||
43 | { | ||
44 | pcibios_enabled = 1; | ||
45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); | ||
46 | if (__supported_pte_mask & _PAGE_NX) | ||
47 | printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); | ||
48 | } | ||
49 | |||
28 | /* | 50 | /* |
29 | * This is the standard structure used to identify the entry point | 51 | * This is the standard structure used to identify the entry point |
30 | * to the BIOS32 Service Directory, as documented in | 52 | * to the BIOS32 Service Directory, as documented in |
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) | |||
332 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", | 354 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", |
333 | bios32_entry); | 355 | bios32_entry); |
334 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; | 356 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; |
357 | set_bios_x(); | ||
335 | if (check_pcibios()) | 358 | if (check_pcibios()) |
336 | return &pci_bios_access; | 359 | return &pci_bios_access; |
337 | } | 360 | } |
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 7bf70b812fa2..021eee91c056 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile | |||
@@ -1,5 +1,7 @@ | |||
1 | # Platform specific code goes here | 1 | # Platform specific code goes here |
2 | obj-y += ce4100/ | ||
2 | obj-y += efi/ | 3 | obj-y += efi/ |
4 | obj-y += iris/ | ||
3 | obj-y += mrst/ | 5 | obj-y += mrst/ |
4 | obj-y += olpc/ | 6 | obj-y += olpc/ |
5 | obj-y += scx200/ | 7 | obj-y += scx200/ |
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile new file mode 100644 index 000000000000..91fc92971d94 --- /dev/null +++ b/arch/x86/platform/ce4100/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o | |||
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c new file mode 100644 index 000000000000..d2c0d51a7178 --- /dev/null +++ b/arch/x86/platform/ce4100/ce4100.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * Intel CE4100 platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2010 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/irq.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/serial_reg.h> | ||
16 | #include <linux/serial_8250.h> | ||
17 | |||
18 | #include <asm/setup.h> | ||
19 | #include <asm/io.h> | ||
20 | |||
21 | static int ce4100_i8042_detect(void) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static void __init sdv_find_smp_config(void) | ||
27 | { | ||
28 | } | ||
29 | |||
30 | #ifdef CONFIG_SERIAL_8250 | ||
31 | |||
32 | |||
33 | static unsigned int mem_serial_in(struct uart_port *p, int offset) | ||
34 | { | ||
35 | offset = offset << p->regshift; | ||
36 | return readl(p->membase + offset); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * The UART Tx interrupts are not set under some conditions and therefore serial | ||
41 | * transmission hangs. This is a silicon issue and has not been root caused. The | ||
42 | * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT | ||
43 | * bit of LSR register in interrupt handler to see whether at least one of these | ||
44 | * two bits is set, if so then process the transmit request. If this workaround | ||
45 | * is not applied, then the serial transmission may hang. This workaround is for | ||
46 | * errata number 9 in Errata - B step. | ||
47 | */ | ||
48 | |||
49 | static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset) | ||
50 | { | ||
51 | unsigned int ret, ier, lsr; | ||
52 | |||
53 | if (offset == UART_IIR) { | ||
54 | offset = offset << p->regshift; | ||
55 | ret = readl(p->membase + offset); | ||
56 | if (ret & UART_IIR_NO_INT) { | ||
57 | /* see if the TX interrupt should have really set */ | ||
58 | ier = mem_serial_in(p, UART_IER); | ||
59 | /* see if the UART's XMIT interrupt is enabled */ | ||
60 | if (ier & UART_IER_THRI) { | ||
61 | lsr = mem_serial_in(p, UART_LSR); | ||
62 | /* now check to see if the UART should be | ||
63 | generating an interrupt (but isn't) */ | ||
64 | if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) | ||
65 | ret &= ~UART_IIR_NO_INT; | ||
66 | } | ||
67 | } | ||
68 | } else | ||
69 | ret = mem_serial_in(p, offset); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value) | ||
74 | { | ||
75 | offset = offset << p->regshift; | ||
76 | writel(value, p->membase + offset); | ||
77 | } | ||
78 | |||
79 | static void ce4100_serial_fixup(int port, struct uart_port *up, | ||
80 | unsigned short *capabilites) | ||
81 | { | ||
82 | #ifdef CONFIG_EARLY_PRINTK | ||
83 | /* | ||
84 | * Over ride the legacy port configuration that comes from | ||
85 | * asm/serial.h. Using the ioport driver then switching to the | ||
86 | * PCI memmaped driver hangs the IOAPIC | ||
87 | */ | ||
88 | if (up->iotype != UPIO_MEM32) { | ||
89 | up->uartclk = 14745600; | ||
90 | up->mapbase = 0xdffe0200; | ||
91 | set_fixmap_nocache(FIX_EARLYCON_MEM_BASE, | ||
92 | up->mapbase & PAGE_MASK); | ||
93 | up->membase = | ||
94 | (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); | ||
95 | up->membase += up->mapbase & ~PAGE_MASK; | ||
96 | up->iotype = UPIO_MEM32; | ||
97 | up->regshift = 2; | ||
98 | } | ||
99 | #endif | ||
100 | up->iobase = 0; | ||
101 | up->serial_in = ce4100_mem_serial_in; | ||
102 | up->serial_out = ce4100_mem_serial_out; | ||
103 | |||
104 | *capabilites |= (1 << 12); | ||
105 | } | ||
106 | |||
107 | static __init void sdv_serial_fixup(void) | ||
108 | { | ||
109 | serial8250_set_isa_configurator(ce4100_serial_fixup); | ||
110 | } | ||
111 | |||
112 | #else | ||
113 | static inline void sdv_serial_fixup(void); | ||
114 | #endif | ||
115 | |||
116 | static void __init sdv_arch_setup(void) | ||
117 | { | ||
118 | sdv_serial_fixup(); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * CE4100 specific x86_init function overrides and early setup | ||
123 | * calls. | ||
124 | */ | ||
125 | void __init x86_ce4100_early_setup(void) | ||
126 | { | ||
127 | x86_init.oem.arch_setup = sdv_arch_setup; | ||
128 | x86_platform.i8042_detect = ce4100_i8042_detect; | ||
129 | x86_init.resources.probe_roms = x86_init_noop; | ||
130 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | ||
131 | x86_init.mpparse.find_smp_config = sdv_find_smp_config; | ||
132 | } | ||
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile new file mode 100644 index 000000000000..db921983a102 --- /dev/null +++ b/arch/x86/platform/iris/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_X86_32_IRIS) += iris.o | |||
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c new file mode 100644 index 000000000000..1ba7f5ed8c9b --- /dev/null +++ b/arch/x86/platform/iris/iris.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * Eurobraille/Iris power off support. | ||
3 | * | ||
4 | * Eurobraille's Iris machine is a PC with no APM or ACPI support. | ||
5 | * It is shutdown by a special I/O sequence which this module provides. | ||
6 | * | ||
7 | * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org> | ||
8 | * | ||
9 | * This program is free software ; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation ; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with the program ; if not, write to the Free Software | ||
21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | */ | ||
23 | |||
24 | #include <linux/moduleparam.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/delay.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/pm.h> | ||
31 | #include <asm/io.h> | ||
32 | |||
33 | #define IRIS_GIO_BASE 0x340 | ||
34 | #define IRIS_GIO_INPUT IRIS_GIO_BASE | ||
35 | #define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1) | ||
36 | #define IRIS_GIO_PULSE 0x80 /* First byte to send */ | ||
37 | #define IRIS_GIO_REST 0x00 /* Second byte to send */ | ||
38 | #define IRIS_GIO_NODEV 0xff /* Likely not an Iris */ | ||
39 | |||
40 | MODULE_LICENSE("GPL"); | ||
41 | MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); | ||
42 | MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); | ||
43 | MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); | ||
44 | |||
45 | static int force; | ||
46 | |||
47 | module_param(force, bool, 0); | ||
48 | MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); | ||
49 | |||
50 | static void (*old_pm_power_off)(void); | ||
51 | |||
52 | static void iris_power_off(void) | ||
53 | { | ||
54 | outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT); | ||
55 | msleep(850); | ||
56 | outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT); | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Before installing the power_off handler, try to make sure the OS is | ||
61 | * running on an Iris. Since Iris does not support DMI, this is done | ||
62 | * by reading its input port and seeing whether the read value is | ||
63 | * meaningful. | ||
64 | */ | ||
65 | static int iris_init(void) | ||
66 | { | ||
67 | unsigned char status; | ||
68 | if (force != 1) { | ||
69 | printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n"); | ||
70 | return -ENODEV; | ||
71 | } | ||
72 | status = inb(IRIS_GIO_INPUT); | ||
73 | if (status == IRIS_GIO_NODEV) { | ||
74 | printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n"); | ||
75 | return -ENODEV; | ||
76 | } | ||
77 | old_pm_power_off = pm_power_off; | ||
78 | pm_power_off = &iris_power_off; | ||
79 | printk(KERN_INFO "Iris power_off handler installed.\n"); | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static void iris_exit(void) | ||
85 | { | ||
86 | pm_power_off = old_pm_power_off; | ||
87 | printk(KERN_INFO "Iris power_off handler uninstalled.\n"); | ||
88 | } | ||
89 | |||
90 | module_init(iris_init); | ||
91 | module_exit(iris_exit); | ||
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile index efbbc552fa95..f61ccdd49341 100644 --- a/arch/x86/platform/mrst/Makefile +++ b/arch/x86/platform/mrst/Makefile | |||
@@ -1 +1,3 @@ | |||
1 | obj-$(CONFIG_X86_MRST) += mrst.o | 1 | obj-$(CONFIG_X86_MRST) += mrst.o |
2 | obj-$(CONFIG_X86_MRST) += vrtc.o | ||
3 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | ||
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 65df603622b2..65df603622b2 100644 --- a/arch/x86/kernel/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c | |||
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 79ae68154e87..fee0b4914e07 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -9,9 +9,19 @@ | |||
9 | * as published by the Free Software Foundation; version 2 | 9 | * as published by the Free Software Foundation; version 2 |
10 | * of the License. | 10 | * of the License. |
11 | */ | 11 | */ |
12 | |||
13 | #define pr_fmt(fmt) "mrst: " fmt | ||
14 | |||
12 | #include <linux/init.h> | 15 | #include <linux/init.h> |
13 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
14 | #include <linux/sfi.h> | 17 | #include <linux/sfi.h> |
18 | #include <linux/intel_pmic_gpio.h> | ||
19 | #include <linux/spi/spi.h> | ||
20 | #include <linux/i2c.h> | ||
21 | #include <linux/i2c/pca953x.h> | ||
22 | #include <linux/gpio_keys.h> | ||
23 | #include <linux/input.h> | ||
24 | #include <linux/platform_device.h> | ||
15 | #include <linux/irq.h> | 25 | #include <linux/irq.h> |
16 | #include <linux/module.h> | 26 | #include <linux/module.h> |
17 | 27 | ||
@@ -23,7 +33,9 @@ | |||
23 | #include <asm/mrst.h> | 33 | #include <asm/mrst.h> |
24 | #include <asm/io.h> | 34 | #include <asm/io.h> |
25 | #include <asm/i8259.h> | 35 | #include <asm/i8259.h> |
36 | #include <asm/intel_scu_ipc.h> | ||
26 | #include <asm/apb_timer.h> | 37 | #include <asm/apb_timer.h> |
38 | #include <asm/reboot.h> | ||
27 | 39 | ||
28 | /* | 40 | /* |
29 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, | 41 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, |
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) | |||
102 | memcpy(sfi_mtimer_array, pentry, totallen); | 114 | memcpy(sfi_mtimer_array, pentry, totallen); |
103 | } | 115 | } |
104 | 116 | ||
105 | printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); | 117 | pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); |
106 | pentry = sfi_mtimer_array; | 118 | pentry = sfi_mtimer_array; |
107 | for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { | 119 | for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { |
108 | printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," | 120 | pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," |
109 | " irq = %d\n", totallen, (u32)pentry->phys_addr, | 121 | " irq = %d\n", totallen, (u32)pentry->phys_addr, |
110 | pentry->freq_hz, pentry->irq); | 122 | pentry->freq_hz, pentry->irq); |
111 | if (!pentry->irq) | 123 | if (!pentry->irq) |
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) | |||
176 | memcpy(sfi_mrtc_array, pentry, totallen); | 188 | memcpy(sfi_mrtc_array, pentry, totallen); |
177 | } | 189 | } |
178 | 190 | ||
179 | printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); | 191 | pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); |
180 | pentry = sfi_mrtc_array; | 192 | pentry = sfi_mrtc_array; |
181 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { | 193 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { |
182 | printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", | 194 | pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", |
183 | totallen, (u32)pentry->phys_addr, pentry->irq); | 195 | totallen, (u32)pentry->phys_addr, pentry->irq); |
184 | mp_irq.type = MP_IOAPIC; | 196 | mp_irq.type = MP_IOAPIC; |
185 | mp_irq.irqtype = mp_INT; | 197 | mp_irq.irqtype = mp_INT; |
186 | mp_irq.irqflag = 0; | 198 | mp_irq.irqflag = 0xf; /* level trigger and active low */ |
187 | mp_irq.srcbus = 0; | 199 | mp_irq.srcbus = 0; |
188 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | 200 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ |
189 | mp_irq.dstapic = MP_APIC_ALL; | 201 | mp_irq.dstapic = MP_APIC_ALL; |
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void) | |||
209 | 221 | ||
210 | void __init mrst_time_init(void) | 222 | void __init mrst_time_init(void) |
211 | { | 223 | { |
224 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | ||
212 | switch (mrst_timer_options) { | 225 | switch (mrst_timer_options) { |
213 | case MRST_TIMER_APBT_ONLY: | 226 | case MRST_TIMER_APBT_ONLY: |
214 | break; | 227 | break; |
@@ -224,16 +237,10 @@ void __init mrst_time_init(void) | |||
224 | return; | 237 | return; |
225 | } | 238 | } |
226 | /* we need at least one APB timer */ | 239 | /* we need at least one APB timer */ |
227 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | ||
228 | pre_init_apic_IRQ0(); | 240 | pre_init_apic_IRQ0(); |
229 | apbt_time_init(); | 241 | apbt_time_init(); |
230 | } | 242 | } |
231 | 243 | ||
232 | void __init mrst_rtc_init(void) | ||
233 | { | ||
234 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | ||
235 | } | ||
236 | |||
237 | void __cpuinit mrst_arch_setup(void) | 244 | void __cpuinit mrst_arch_setup(void) |
238 | { | 245 | { |
239 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) | 246 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) |
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void) | |||
256 | return 0; | 263 | return 0; |
257 | } | 264 | } |
258 | 265 | ||
266 | /* Reboot and power off are handled by the SCU on a MID device */ | ||
267 | static void mrst_power_off(void) | ||
268 | { | ||
269 | intel_scu_ipc_simple_command(0xf1, 1); | ||
270 | } | ||
271 | |||
272 | static void mrst_reboot(void) | ||
273 | { | ||
274 | intel_scu_ipc_simple_command(0xf1, 0); | ||
275 | } | ||
276 | |||
259 | /* | 277 | /* |
260 | * Moorestown specific x86_init function overrides and early setup | 278 | * Moorestown specific x86_init function overrides and early setup |
261 | * calls. | 279 | * calls. |
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void) | |||
281 | 299 | ||
282 | legacy_pic = &null_legacy_pic; | 300 | legacy_pic = &null_legacy_pic; |
283 | 301 | ||
302 | /* Moorestown specific power_off/restart method */ | ||
303 | pm_power_off = mrst_power_off; | ||
304 | machine_ops.emergency_restart = mrst_reboot; | ||
305 | |||
284 | /* Avoid searching for BIOS MP tables */ | 306 | /* Avoid searching for BIOS MP tables */ |
285 | x86_init.mpparse.find_smp_config = x86_init_noop; | 307 | x86_init.mpparse.find_smp_config = x86_init_noop; |
286 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | 308 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg) | |||
309 | return 0; | 331 | return 0; |
310 | } | 332 | } |
311 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | 333 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); |
334 | |||
335 | /* | ||
336 | * Parsing GPIO table first, since the DEVS table will need this table | ||
337 | * to map the pin name to the actual pin. | ||
338 | */ | ||
339 | static struct sfi_gpio_table_entry *gpio_table; | ||
340 | static int gpio_num_entry; | ||
341 | |||
342 | static int __init sfi_parse_gpio(struct sfi_table_header *table) | ||
343 | { | ||
344 | struct sfi_table_simple *sb; | ||
345 | struct sfi_gpio_table_entry *pentry; | ||
346 | int num, i; | ||
347 | |||
348 | if (gpio_table) | ||
349 | return 0; | ||
350 | sb = (struct sfi_table_simple *)table; | ||
351 | num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); | ||
352 | pentry = (struct sfi_gpio_table_entry *)sb->pentry; | ||
353 | |||
354 | gpio_table = (struct sfi_gpio_table_entry *) | ||
355 | kmalloc(num * sizeof(*pentry), GFP_KERNEL); | ||
356 | if (!gpio_table) | ||
357 | return -1; | ||
358 | memcpy(gpio_table, pentry, num * sizeof(*pentry)); | ||
359 | gpio_num_entry = num; | ||
360 | |||
361 | pr_debug("GPIO pin info:\n"); | ||
362 | for (i = 0; i < num; i++, pentry++) | ||
363 | pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," | ||
364 | " pin = %d\n", i, | ||
365 | pentry->controller_name, | ||
366 | pentry->pin_name, | ||
367 | pentry->pin_no); | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static int get_gpio_by_name(const char *name) | ||
372 | { | ||
373 | struct sfi_gpio_table_entry *pentry = gpio_table; | ||
374 | int i; | ||
375 | |||
376 | if (!pentry) | ||
377 | return -1; | ||
378 | for (i = 0; i < gpio_num_entry; i++, pentry++) { | ||
379 | if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) | ||
380 | return pentry->pin_no; | ||
381 | } | ||
382 | return -1; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Here defines the array of devices platform data that IAFW would export | ||
387 | * through SFI "DEVS" table, we use name and type to match the device and | ||
388 | * its platform data. | ||
389 | */ | ||
390 | struct devs_id { | ||
391 | char name[SFI_NAME_LEN + 1]; | ||
392 | u8 type; | ||
393 | u8 delay; | ||
394 | void *(*get_platform_data)(void *info); | ||
395 | }; | ||
396 | |||
397 | /* the offset for the mapping of global gpio pin to irq */ | ||
398 | #define MRST_IRQ_OFFSET 0x100 | ||
399 | |||
400 | static void __init *pmic_gpio_platform_data(void *info) | ||
401 | { | ||
402 | static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; | ||
403 | int gpio_base = get_gpio_by_name("pmic_gpio_base"); | ||
404 | |||
405 | if (gpio_base == -1) | ||
406 | gpio_base = 64; | ||
407 | pmic_gpio_pdata.gpio_base = gpio_base; | ||
408 | pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; | ||
409 | pmic_gpio_pdata.gpiointr = 0xffffeff8; | ||
410 | |||
411 | return &pmic_gpio_pdata; | ||
412 | } | ||
413 | |||
414 | static void __init *max3111_platform_data(void *info) | ||
415 | { | ||
416 | struct spi_board_info *spi_info = info; | ||
417 | int intr = get_gpio_by_name("max3111_int"); | ||
418 | |||
419 | if (intr == -1) | ||
420 | return NULL; | ||
421 | spi_info->irq = intr + MRST_IRQ_OFFSET; | ||
422 | return NULL; | ||
423 | } | ||
424 | |||
425 | /* we have multiple max7315 on the board ... */ | ||
426 | #define MAX7315_NUM 2 | ||
427 | static void __init *max7315_platform_data(void *info) | ||
428 | { | ||
429 | static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; | ||
430 | static int nr; | ||
431 | struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; | ||
432 | struct i2c_board_info *i2c_info = info; | ||
433 | int gpio_base, intr; | ||
434 | char base_pin_name[SFI_NAME_LEN + 1]; | ||
435 | char intr_pin_name[SFI_NAME_LEN + 1]; | ||
436 | |||
437 | if (nr == MAX7315_NUM) { | ||
438 | pr_err("too many max7315s, we only support %d\n", | ||
439 | MAX7315_NUM); | ||
440 | return NULL; | ||
441 | } | ||
442 | /* we have several max7315 on the board, we only need load several | ||
443 | * instances of the same pca953x driver to cover them | ||
444 | */ | ||
445 | strcpy(i2c_info->type, "max7315"); | ||
446 | if (nr++) { | ||
447 | sprintf(base_pin_name, "max7315_%d_base", nr); | ||
448 | sprintf(intr_pin_name, "max7315_%d_int", nr); | ||
449 | } else { | ||
450 | strcpy(base_pin_name, "max7315_base"); | ||
451 | strcpy(intr_pin_name, "max7315_int"); | ||
452 | } | ||
453 | |||
454 | gpio_base = get_gpio_by_name(base_pin_name); | ||
455 | intr = get_gpio_by_name(intr_pin_name); | ||
456 | |||
457 | if (gpio_base == -1) | ||
458 | return NULL; | ||
459 | max7315->gpio_base = gpio_base; | ||
460 | if (intr != -1) { | ||
461 | i2c_info->irq = intr + MRST_IRQ_OFFSET; | ||
462 | max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; | ||
463 | } else { | ||
464 | i2c_info->irq = -1; | ||
465 | max7315->irq_base = -1; | ||
466 | } | ||
467 | return max7315; | ||
468 | } | ||
469 | |||
470 | static void __init *emc1403_platform_data(void *info) | ||
471 | { | ||
472 | static short intr2nd_pdata; | ||
473 | struct i2c_board_info *i2c_info = info; | ||
474 | int intr = get_gpio_by_name("thermal_int"); | ||
475 | int intr2nd = get_gpio_by_name("thermal_alert"); | ||
476 | |||
477 | if (intr == -1 || intr2nd == -1) | ||
478 | return NULL; | ||
479 | |||
480 | i2c_info->irq = intr + MRST_IRQ_OFFSET; | ||
481 | intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; | ||
482 | |||
483 | return &intr2nd_pdata; | ||
484 | } | ||
485 | |||
486 | static void __init *lis331dl_platform_data(void *info) | ||
487 | { | ||
488 | static short intr2nd_pdata; | ||
489 | struct i2c_board_info *i2c_info = info; | ||
490 | int intr = get_gpio_by_name("accel_int"); | ||
491 | int intr2nd = get_gpio_by_name("accel_2"); | ||
492 | |||
493 | if (intr == -1 || intr2nd == -1) | ||
494 | return NULL; | ||
495 | |||
496 | i2c_info->irq = intr + MRST_IRQ_OFFSET; | ||
497 | intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; | ||
498 | |||
499 | return &intr2nd_pdata; | ||
500 | } | ||
501 | |||
502 | static void __init *no_platform_data(void *info) | ||
503 | { | ||
504 | return NULL; | ||
505 | } | ||
506 | |||
507 | static const struct devs_id __initconst device_ids[] = { | ||
508 | {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, | ||
509 | {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, | ||
510 | {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, | ||
511 | {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, | ||
512 | {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, | ||
513 | {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, | ||
514 | {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, | ||
515 | {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, | ||
516 | {}, | ||
517 | }; | ||
518 | |||
519 | #define MAX_IPCDEVS 24 | ||
520 | static struct platform_device *ipc_devs[MAX_IPCDEVS]; | ||
521 | static int ipc_next_dev; | ||
522 | |||
523 | #define MAX_SCU_SPI 24 | ||
524 | static struct spi_board_info *spi_devs[MAX_SCU_SPI]; | ||
525 | static int spi_next_dev; | ||
526 | |||
527 | #define MAX_SCU_I2C 24 | ||
528 | static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; | ||
529 | static int i2c_bus[MAX_SCU_I2C]; | ||
530 | static int i2c_next_dev; | ||
531 | |||
532 | static void __init intel_scu_device_register(struct platform_device *pdev) | ||
533 | { | ||
534 | if(ipc_next_dev == MAX_IPCDEVS) | ||
535 | pr_err("too many SCU IPC devices"); | ||
536 | else | ||
537 | ipc_devs[ipc_next_dev++] = pdev; | ||
538 | } | ||
539 | |||
540 | static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) | ||
541 | { | ||
542 | struct spi_board_info *new_dev; | ||
543 | |||
544 | if (spi_next_dev == MAX_SCU_SPI) { | ||
545 | pr_err("too many SCU SPI devices"); | ||
546 | return; | ||
547 | } | ||
548 | |||
549 | new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); | ||
550 | if (!new_dev) { | ||
551 | pr_err("failed to alloc mem for delayed spi dev %s\n", | ||
552 | sdev->modalias); | ||
553 | return; | ||
554 | } | ||
555 | memcpy(new_dev, sdev, sizeof(*sdev)); | ||
556 | |||
557 | spi_devs[spi_next_dev++] = new_dev; | ||
558 | } | ||
559 | |||
560 | static void __init intel_scu_i2c_device_register(int bus, | ||
561 | struct i2c_board_info *idev) | ||
562 | { | ||
563 | struct i2c_board_info *new_dev; | ||
564 | |||
565 | if (i2c_next_dev == MAX_SCU_I2C) { | ||
566 | pr_err("too many SCU I2C devices"); | ||
567 | return; | ||
568 | } | ||
569 | |||
570 | new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); | ||
571 | if (!new_dev) { | ||
572 | pr_err("failed to alloc mem for delayed i2c dev %s\n", | ||
573 | idev->type); | ||
574 | return; | ||
575 | } | ||
576 | memcpy(new_dev, idev, sizeof(*idev)); | ||
577 | |||
578 | i2c_bus[i2c_next_dev] = bus; | ||
579 | i2c_devs[i2c_next_dev++] = new_dev; | ||
580 | } | ||
581 | |||
582 | /* Called by IPC driver */ | ||
583 | void intel_scu_devices_create(void) | ||
584 | { | ||
585 | int i; | ||
586 | |||
587 | for (i = 0; i < ipc_next_dev; i++) | ||
588 | platform_device_add(ipc_devs[i]); | ||
589 | |||
590 | for (i = 0; i < spi_next_dev; i++) | ||
591 | spi_register_board_info(spi_devs[i], 1); | ||
592 | |||
593 | for (i = 0; i < i2c_next_dev; i++) { | ||
594 | struct i2c_adapter *adapter; | ||
595 | struct i2c_client *client; | ||
596 | |||
597 | adapter = i2c_get_adapter(i2c_bus[i]); | ||
598 | if (adapter) { | ||
599 | client = i2c_new_device(adapter, i2c_devs[i]); | ||
600 | if (!client) | ||
601 | pr_err("can't create i2c device %s\n", | ||
602 | i2c_devs[i]->type); | ||
603 | } else | ||
604 | i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); | ||
605 | } | ||
606 | } | ||
607 | EXPORT_SYMBOL_GPL(intel_scu_devices_create); | ||
608 | |||
609 | /* Called by IPC driver */ | ||
610 | void intel_scu_devices_destroy(void) | ||
611 | { | ||
612 | int i; | ||
613 | |||
614 | for (i = 0; i < ipc_next_dev; i++) | ||
615 | platform_device_del(ipc_devs[i]); | ||
616 | } | ||
617 | EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); | ||
618 | |||
619 | static void __init install_irq_resource(struct platform_device *pdev, int irq) | ||
620 | { | ||
621 | /* Single threaded */ | ||
622 | static struct resource __initdata res = { | ||
623 | .name = "IRQ", | ||
624 | .flags = IORESOURCE_IRQ, | ||
625 | }; | ||
626 | res.start = irq; | ||
627 | platform_device_add_resources(pdev, &res, 1); | ||
628 | } | ||
629 | |||
630 | static void __init sfi_handle_ipc_dev(struct platform_device *pdev) | ||
631 | { | ||
632 | const struct devs_id *dev = device_ids; | ||
633 | void *pdata = NULL; | ||
634 | |||
635 | while (dev->name[0]) { | ||
636 | if (dev->type == SFI_DEV_TYPE_IPC && | ||
637 | !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) { | ||
638 | pdata = dev->get_platform_data(pdev); | ||
639 | break; | ||
640 | } | ||
641 | dev++; | ||
642 | } | ||
643 | pdev->dev.platform_data = pdata; | ||
644 | intel_scu_device_register(pdev); | ||
645 | } | ||
646 | |||
647 | static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) | ||
648 | { | ||
649 | const struct devs_id *dev = device_ids; | ||
650 | void *pdata = NULL; | ||
651 | |||
652 | while (dev->name[0]) { | ||
653 | if (dev->type == SFI_DEV_TYPE_SPI && | ||
654 | !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { | ||
655 | pdata = dev->get_platform_data(spi_info); | ||
656 | break; | ||
657 | } | ||
658 | dev++; | ||
659 | } | ||
660 | spi_info->platform_data = pdata; | ||
661 | if (dev->delay) | ||
662 | intel_scu_spi_device_register(spi_info); | ||
663 | else | ||
664 | spi_register_board_info(spi_info, 1); | ||
665 | } | ||
666 | |||
667 | static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) | ||
668 | { | ||
669 | const struct devs_id *dev = device_ids; | ||
670 | void *pdata = NULL; | ||
671 | |||
672 | while (dev->name[0]) { | ||
673 | if (dev->type == SFI_DEV_TYPE_I2C && | ||
674 | !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { | ||
675 | pdata = dev->get_platform_data(i2c_info); | ||
676 | break; | ||
677 | } | ||
678 | dev++; | ||
679 | } | ||
680 | i2c_info->platform_data = pdata; | ||
681 | |||
682 | if (dev->delay) | ||
683 | intel_scu_i2c_device_register(bus, i2c_info); | ||
684 | else | ||
685 | i2c_register_board_info(bus, i2c_info, 1); | ||
686 | } | ||
687 | |||
688 | |||
689 | static int __init sfi_parse_devs(struct sfi_table_header *table) | ||
690 | { | ||
691 | struct sfi_table_simple *sb; | ||
692 | struct sfi_device_table_entry *pentry; | ||
693 | struct spi_board_info spi_info; | ||
694 | struct i2c_board_info i2c_info; | ||
695 | struct platform_device *pdev; | ||
696 | int num, i, bus; | ||
697 | int ioapic; | ||
698 | struct io_apic_irq_attr irq_attr; | ||
699 | |||
700 | sb = (struct sfi_table_simple *)table; | ||
701 | num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); | ||
702 | pentry = (struct sfi_device_table_entry *)sb->pentry; | ||
703 | |||
704 | for (i = 0; i < num; i++, pentry++) { | ||
705 | if (pentry->irq != (u8)0xff) { /* native RTE case */ | ||
706 | /* these SPI2 devices are not exposed to system as PCI | ||
707 | * devices, but they have separate RTE entry in IOAPIC | ||
708 | * so we have to enable them one by one here | ||
709 | */ | ||
710 | ioapic = mp_find_ioapic(pentry->irq); | ||
711 | irq_attr.ioapic = ioapic; | ||
712 | irq_attr.ioapic_pin = pentry->irq; | ||
713 | irq_attr.trigger = 1; | ||
714 | irq_attr.polarity = 1; | ||
715 | io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); | ||
716 | } | ||
717 | switch (pentry->type) { | ||
718 | case SFI_DEV_TYPE_IPC: | ||
719 | /* ID as IRQ is a hack that will go away */ | ||
720 | pdev = platform_device_alloc(pentry->name, pentry->irq); | ||
721 | if (pdev == NULL) { | ||
722 | pr_err("out of memory for SFI platform device '%s'.\n", | ||
723 | pentry->name); | ||
724 | continue; | ||
725 | } | ||
726 | install_irq_resource(pdev, pentry->irq); | ||
727 | pr_debug("info[%2d]: IPC bus, name = %16.16s, " | ||
728 | "irq = 0x%2x\n", i, pentry->name, pentry->irq); | ||
729 | sfi_handle_ipc_dev(pdev); | ||
730 | break; | ||
731 | case SFI_DEV_TYPE_SPI: | ||
732 | memset(&spi_info, 0, sizeof(spi_info)); | ||
733 | strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); | ||
734 | spi_info.irq = pentry->irq; | ||
735 | spi_info.bus_num = pentry->host_num; | ||
736 | spi_info.chip_select = pentry->addr; | ||
737 | spi_info.max_speed_hz = pentry->max_freq; | ||
738 | pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " | ||
739 | "irq = 0x%2x, max_freq = %d, cs = %d\n", i, | ||
740 | spi_info.bus_num, | ||
741 | spi_info.modalias, | ||
742 | spi_info.irq, | ||
743 | spi_info.max_speed_hz, | ||
744 | spi_info.chip_select); | ||
745 | sfi_handle_spi_dev(&spi_info); | ||
746 | break; | ||
747 | case SFI_DEV_TYPE_I2C: | ||
748 | memset(&i2c_info, 0, sizeof(i2c_info)); | ||
749 | bus = pentry->host_num; | ||
750 | strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); | ||
751 | i2c_info.irq = pentry->irq; | ||
752 | i2c_info.addr = pentry->addr; | ||
753 | pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " | ||
754 | "irq = 0x%2x, addr = 0x%x\n", i, bus, | ||
755 | i2c_info.type, | ||
756 | i2c_info.irq, | ||
757 | i2c_info.addr); | ||
758 | sfi_handle_i2c_dev(bus, &i2c_info); | ||
759 | break; | ||
760 | case SFI_DEV_TYPE_UART: | ||
761 | case SFI_DEV_TYPE_HSI: | ||
762 | default: | ||
763 | ; | ||
764 | } | ||
765 | } | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | static int __init mrst_platform_init(void) | ||
770 | { | ||
771 | sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); | ||
772 | sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); | ||
773 | return 0; | ||
774 | } | ||
775 | arch_initcall(mrst_platform_init); | ||
776 | |||
777 | /* | ||
778 | * we will search these buttons in SFI GPIO table (by name) | ||
779 | * and register them dynamically. Please add all possible | ||
780 | * buttons here, we will shrink them if no GPIO found. | ||
781 | */ | ||
782 | static struct gpio_keys_button gpio_button[] = { | ||
783 | {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, | ||
784 | {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, | ||
785 | {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, | ||
786 | {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, | ||
787 | {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, | ||
788 | {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, | ||
789 | {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, | ||
790 | {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, | ||
791 | {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, | ||
792 | {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, | ||
793 | }; | ||
794 | |||
795 | static struct gpio_keys_platform_data mrst_gpio_keys = { | ||
796 | .buttons = gpio_button, | ||
797 | .rep = 1, | ||
798 | .nbuttons = -1, /* will fill it after search */ | ||
799 | }; | ||
800 | |||
801 | static struct platform_device pb_device = { | ||
802 | .name = "gpio-keys", | ||
803 | .id = -1, | ||
804 | .dev = { | ||
805 | .platform_data = &mrst_gpio_keys, | ||
806 | }, | ||
807 | }; | ||
808 | |||
809 | /* | ||
810 | * Shrink the non-existent buttons, register the gpio button | ||
811 | * device if there is some | ||
812 | */ | ||
813 | static int __init pb_keys_init(void) | ||
814 | { | ||
815 | struct gpio_keys_button *gb = gpio_button; | ||
816 | int i, num, good = 0; | ||
817 | |||
818 | num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); | ||
819 | for (i = 0; i < num; i++) { | ||
820 | gb[i].gpio = get_gpio_by_name(gb[i].desc); | ||
821 | if (gb[i].gpio == -1) | ||
822 | continue; | ||
823 | |||
824 | if (i != good) | ||
825 | gb[good] = gb[i]; | ||
826 | good++; | ||
827 | } | ||
828 | |||
829 | if (good) { | ||
830 | mrst_gpio_keys.nbuttons = good; | ||
831 | return platform_device_register(&pb_device); | ||
832 | } | ||
833 | return 0; | ||
834 | } | ||
835 | late_initcall(pb_keys_init); | ||
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c new file mode 100644 index 000000000000..32cd7edd71a0 --- /dev/null +++ b/arch/x86/platform/mrst/vrtc.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * vrtc.c: Driver for virtual RTC device on Intel MID platform | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | * | ||
11 | * Note: | ||
12 | * VRTC is emulated by system controller firmware, the real HW | ||
13 | * RTC is located in the PMIC device. SCU FW shadows PMIC RTC | ||
14 | * in a memory mapped IO space that is visible to the host IA | ||
15 | * processor. | ||
16 | * | ||
17 | * This driver is based on RTC CMOS driver. | ||
18 | */ | ||
19 | |||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/sfi.h> | ||
23 | #include <linux/platform_device.h> | ||
24 | |||
25 | #include <asm/mrst.h> | ||
26 | #include <asm/mrst-vrtc.h> | ||
27 | #include <asm/time.h> | ||
28 | #include <asm/fixmap.h> | ||
29 | |||
30 | static unsigned char __iomem *vrtc_virt_base; | ||
31 | |||
32 | unsigned char vrtc_cmos_read(unsigned char reg) | ||
33 | { | ||
34 | unsigned char retval; | ||
35 | |||
36 | /* vRTC's registers range from 0x0 to 0xD */ | ||
37 | if (reg > 0xd || !vrtc_virt_base) | ||
38 | return 0xff; | ||
39 | |||
40 | lock_cmos_prefix(reg); | ||
41 | retval = __raw_readb(vrtc_virt_base + (reg << 2)); | ||
42 | lock_cmos_suffix(reg); | ||
43 | return retval; | ||
44 | } | ||
45 | EXPORT_SYMBOL_GPL(vrtc_cmos_read); | ||
46 | |||
47 | void vrtc_cmos_write(unsigned char val, unsigned char reg) | ||
48 | { | ||
49 | if (reg > 0xd || !vrtc_virt_base) | ||
50 | return; | ||
51 | |||
52 | lock_cmos_prefix(reg); | ||
53 | __raw_writeb(val, vrtc_virt_base + (reg << 2)); | ||
54 | lock_cmos_suffix(reg); | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(vrtc_cmos_write); | ||
57 | |||
58 | unsigned long vrtc_get_time(void) | ||
59 | { | ||
60 | u8 sec, min, hour, mday, mon; | ||
61 | u32 year; | ||
62 | |||
63 | while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) | ||
64 | cpu_relax(); | ||
65 | |||
66 | sec = vrtc_cmos_read(RTC_SECONDS); | ||
67 | min = vrtc_cmos_read(RTC_MINUTES); | ||
68 | hour = vrtc_cmos_read(RTC_HOURS); | ||
69 | mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); | ||
70 | mon = vrtc_cmos_read(RTC_MONTH); | ||
71 | year = vrtc_cmos_read(RTC_YEAR); | ||
72 | |||
73 | /* vRTC YEAR reg contains the offset to 1960 */ | ||
74 | year += 1960; | ||
75 | |||
76 | printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " | ||
77 | "mon: %d year: %d\n", sec, min, hour, mday, mon, year); | ||
78 | |||
79 | return mktime(year, mon, mday, hour, min, sec); | ||
80 | } | ||
81 | |||
82 | /* Only care about the minutes and seconds */ | ||
83 | int vrtc_set_mmss(unsigned long nowtime) | ||
84 | { | ||
85 | int real_sec, real_min; | ||
86 | int vrtc_min; | ||
87 | |||
88 | vrtc_min = vrtc_cmos_read(RTC_MINUTES); | ||
89 | |||
90 | real_sec = nowtime % 60; | ||
91 | real_min = nowtime / 60; | ||
92 | if (((abs(real_min - vrtc_min) + 15)/30) & 1) | ||
93 | real_min += 30; | ||
94 | real_min %= 60; | ||
95 | |||
96 | vrtc_cmos_write(real_sec, RTC_SECONDS); | ||
97 | vrtc_cmos_write(real_min, RTC_MINUTES); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | void __init mrst_rtc_init(void) | ||
102 | { | ||
103 | unsigned long rtc_paddr; | ||
104 | void __iomem *virt_base; | ||
105 | |||
106 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | ||
107 | if (!sfi_mrtc_num) | ||
108 | return; | ||
109 | |||
110 | rtc_paddr = sfi_mrtc_array[0].phys_addr; | ||
111 | |||
112 | /* vRTC's register address may not be page aligned */ | ||
113 | set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr); | ||
114 | |||
115 | virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC); | ||
116 | virt_base += rtc_paddr & ~PAGE_MASK; | ||
117 | vrtc_virt_base = virt_base; | ||
118 | |||
119 | x86_platform.get_wallclock = vrtc_get_time; | ||
120 | x86_platform.set_wallclock = vrtc_set_mmss; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * The Moorestown platform has a memory mapped virtual RTC device that emulates | ||
125 | * the programming interface of the RTC. | ||
126 | */ | ||
127 | |||
128 | static struct resource vrtc_resources[] = { | ||
129 | [0] = { | ||
130 | .flags = IORESOURCE_MEM, | ||
131 | }, | ||
132 | [1] = { | ||
133 | .flags = IORESOURCE_IRQ, | ||
134 | } | ||
135 | }; | ||
136 | |||
137 | static struct platform_device vrtc_device = { | ||
138 | .name = "rtc_mrst", | ||
139 | .id = -1, | ||
140 | .resource = vrtc_resources, | ||
141 | .num_resources = ARRAY_SIZE(vrtc_resources), | ||
142 | }; | ||
143 | |||
144 | /* Register the RTC device if appropriate */ | ||
145 | static int __init mrst_device_create(void) | ||
146 | { | ||
147 | /* No Moorestown, no device */ | ||
148 | if (!mrst_identify_cpu()) | ||
149 | return -ENODEV; | ||
150 | /* No timer, no device */ | ||
151 | if (!sfi_mrtc_num) | ||
152 | return -ENODEV; | ||
153 | |||
154 | /* iomem resource */ | ||
155 | vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr; | ||
156 | vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr + | ||
157 | MRST_VRTC_MAP_SZ; | ||
158 | /* irq resource */ | ||
159 | vrtc_resources[1].start = sfi_mrtc_array[0].irq; | ||
160 | vrtc_resources[1].end = sfi_mrtc_array[0].irq; | ||
161 | |||
162 | return platform_device_register(&vrtc_device); | ||
163 | } | ||
164 | |||
165 | module_init(mrst_device_create); | ||
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index dd4c281ffe57..ca54875ac795 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c | |||
@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address) | |||
48 | /* All CPUs enumerated by SFI must be present and enabled */ | 48 | /* All CPUs enumerated by SFI must be present and enabled */ |
49 | static void __cpuinit mp_sfi_register_lapic(u8 id) | 49 | static void __cpuinit mp_sfi_register_lapic(u8 id) |
50 | { | 50 | { |
51 | if (MAX_APICS - id <= 0) { | 51 | if (MAX_LOCAL_APIC - id <= 0) { |
52 | pr_warning("Processor #%d invalid (max %d)\n", | 52 | pr_warning("Processor #%d invalid (max %d)\n", |
53 | id, MAX_APICS); | 53 | id, MAX_LOCAL_APIC); |
54 | return; | 54 | return; |
55 | } | 55 | } |
56 | 56 | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index ba9caa808a9c..df58e9cad96a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1341 | 1341 | ||
1342 | /* | 1342 | /* |
1343 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | 1343 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) |
1344 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub | 1344 | * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) |
1345 | */ | 1345 | */ |
1346 | bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE | 1346 | bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE |
1347 | * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | 1347 | * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); |
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void) | |||
1490 | /* | 1490 | /* |
1491 | * initialize the bau_control structure for each cpu | 1491 | * initialize the bau_control structure for each cpu |
1492 | */ | 1492 | */ |
1493 | static void __init uv_init_per_cpu(int nuvhubs) | 1493 | static int __init uv_init_per_cpu(int nuvhubs) |
1494 | { | 1494 | { |
1495 | int i; | 1495 | int i; |
1496 | int cpu; | 1496 | int cpu; |
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1507 | struct bau_control *smaster = NULL; | 1507 | struct bau_control *smaster = NULL; |
1508 | struct socket_desc { | 1508 | struct socket_desc { |
1509 | short num_cpus; | 1509 | short num_cpus; |
1510 | short cpu_number[16]; | 1510 | short cpu_number[MAX_CPUS_PER_SOCKET]; |
1511 | }; | 1511 | }; |
1512 | struct uvhub_desc { | 1512 | struct uvhub_desc { |
1513 | unsigned short socket_mask; | 1513 | unsigned short socket_mask; |
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1540 | sdp = &bdp->socket[socket]; | 1540 | sdp = &bdp->socket[socket]; |
1541 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1541 | sdp->cpu_number[sdp->num_cpus] = cpu; |
1542 | sdp->num_cpus++; | 1542 | sdp->num_cpus++; |
1543 | if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { | ||
1544 | printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); | ||
1545 | return 1; | ||
1546 | } | ||
1543 | } | 1547 | } |
1544 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | 1548 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1545 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) | 1549 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) |
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1570 | bcp->uvhub_master = hmaster; | 1574 | bcp->uvhub_master = hmaster; |
1571 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> | 1575 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> |
1572 | blade_processor_id; | 1576 | blade_processor_id; |
1577 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | ||
1578 | printk(KERN_EMERG | ||
1579 | "%d cpus per uvhub invalid\n", | ||
1580 | bcp->uvhub_cpu); | ||
1581 | return 1; | ||
1582 | } | ||
1573 | } | 1583 | } |
1574 | nextsocket: | 1584 | nextsocket: |
1575 | socket++; | 1585 | socket++; |
@@ -1595,6 +1605,7 @@ nextsocket: | |||
1595 | bcp->congested_reps = congested_reps; | 1605 | bcp->congested_reps = congested_reps; |
1596 | bcp->congested_period = congested_period; | 1606 | bcp->congested_period = congested_period; |
1597 | } | 1607 | } |
1608 | return 0; | ||
1598 | } | 1609 | } |
1599 | 1610 | ||
1600 | /* | 1611 | /* |
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void) | |||
1625 | spin_lock_init(&disable_lock); | 1636 | spin_lock_init(&disable_lock); |
1626 | congested_cycles = microsec_2_cycles(congested_response_us); | 1637 | congested_cycles = microsec_2_cycles(congested_response_us); |
1627 | 1638 | ||
1628 | uv_init_per_cpu(nuvhubs); | 1639 | if (uv_init_per_cpu(nuvhubs)) { |
1640 | nobau = 1; | ||
1641 | return 0; | ||
1642 | } | ||
1629 | 1643 | ||
1630 | uv_partition_base_pnode = 0x7fffffff; | 1644 | uv_partition_base_pnode = 0x7fffffff; |
1631 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1645 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) |
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c index 3371bd053b89..632037671746 100644 --- a/arch/x86/platform/visws/visws_quirks.c +++ b/arch/x86/platform/visws/visws_quirks.c | |||
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
171 | ver = m->apicver; | 171 | ver = m->apicver; |
172 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { | 172 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { |
173 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 173 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
174 | m->apicid, MAX_APICS); | 174 | m->apicid, MAX_LOCAL_APIC); |
175 | return; | 175 | return; |
176 | } | 176 | } |
177 | 177 | ||
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 5718566e00f9..d9926afec110 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c | |||
@@ -275,13 +275,23 @@ acpi_table_parse_srat(enum acpi_srat_type id, | |||
275 | int __init acpi_numa_init(void) | 275 | int __init acpi_numa_init(void) |
276 | { | 276 | { |
277 | int ret = 0; | 277 | int ret = 0; |
278 | int nr_cpu_entries = nr_cpu_ids; | ||
279 | |||
280 | #ifdef CONFIG_X86 | ||
281 | /* | ||
282 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | ||
283 | * SRAT cpu entries could have different order with that in MADT. | ||
284 | * So go over all cpu entries in SRAT to get apicid to node mapping. | ||
285 | */ | ||
286 | nr_cpu_entries = MAX_LOCAL_APIC; | ||
287 | #endif | ||
278 | 288 | ||
279 | /* SRAT: Static Resource Affinity Table */ | 289 | /* SRAT: Static Resource Affinity Table */ |
280 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { | 290 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { |
281 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, | 291 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, |
282 | acpi_parse_x2apic_affinity, nr_cpu_ids); | 292 | acpi_parse_x2apic_affinity, nr_cpu_entries); |
283 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, | 293 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, |
284 | acpi_parse_processor_affinity, nr_cpu_ids); | 294 | acpi_parse_processor_affinity, nr_cpu_entries); |
285 | ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, | 295 | ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, |
286 | acpi_parse_memory_affinity, | 296 | acpi_parse_memory_affinity, |
287 | NR_NODE_MEMBLKS); | 297 | NR_NODE_MEMBLKS); |
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 42396df55556..9252e85706ef 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c | |||
@@ -38,7 +38,7 @@ static int agp_bridges_found; | |||
38 | 38 | ||
39 | static void amd64_tlbflush(struct agp_memory *temp) | 39 | static void amd64_tlbflush(struct agp_memory *temp) |
40 | { | 40 | { |
41 | k8_flush_garts(); | 41 | amd_flush_garts(); |
42 | } | 42 | } |
43 | 43 | ||
44 | static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) | 44 | static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) |
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void) | |||
124 | u32 temp; | 124 | u32 temp; |
125 | struct aper_size_info_32 *values; | 125 | struct aper_size_info_32 *values; |
126 | 126 | ||
127 | dev = k8_northbridges.nb_misc[0]; | 127 | dev = node_to_amd_nb(0)->misc; |
128 | if (dev==NULL) | 128 | if (dev==NULL) |
129 | return 0; | 129 | return 0; |
130 | 130 | ||
@@ -181,16 +181,15 @@ static int amd_8151_configure(void) | |||
181 | unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); | 181 | unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); |
182 | int i; | 182 | int i; |
183 | 183 | ||
184 | if (!k8_northbridges.gart_supported) | 184 | if (!amd_nb_has_feature(AMD_NB_GART)) |
185 | return 0; | 185 | return 0; |
186 | 186 | ||
187 | /* Configure AGP regs in each x86-64 host bridge. */ | 187 | /* Configure AGP regs in each x86-64 host bridge. */ |
188 | for (i = 0; i < k8_northbridges.num; i++) { | 188 | for (i = 0; i < amd_nb_num(); i++) { |
189 | agp_bridge->gart_bus_addr = | 189 | agp_bridge->gart_bus_addr = |
190 | amd64_configure(k8_northbridges.nb_misc[i], | 190 | amd64_configure(node_to_amd_nb(i)->misc, gatt_bus); |
191 | gatt_bus); | ||
192 | } | 191 | } |
193 | k8_flush_garts(); | 192 | amd_flush_garts(); |
194 | return 0; | 193 | return 0; |
195 | } | 194 | } |
196 | 195 | ||
@@ -200,11 +199,11 @@ static void amd64_cleanup(void) | |||
200 | u32 tmp; | 199 | u32 tmp; |
201 | int i; | 200 | int i; |
202 | 201 | ||
203 | if (!k8_northbridges.gart_supported) | 202 | if (!amd_nb_has_feature(AMD_NB_GART)) |
204 | return; | 203 | return; |
205 | 204 | ||
206 | for (i = 0; i < k8_northbridges.num; i++) { | 205 | for (i = 0; i < amd_nb_num(); i++) { |
207 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | 206 | struct pci_dev *dev = node_to_amd_nb(i)->misc; |
208 | /* disable gart translation */ | 207 | /* disable gart translation */ |
209 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); | 208 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); |
210 | tmp &= ~GARTEN; | 209 | tmp &= ~GARTEN; |
@@ -331,15 +330,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr) | |||
331 | { | 330 | { |
332 | int i; | 331 | int i; |
333 | 332 | ||
334 | if (cache_k8_northbridges() < 0) | 333 | if (amd_cache_northbridges() < 0) |
335 | return -ENODEV; | 334 | return -ENODEV; |
336 | 335 | ||
337 | if (!k8_northbridges.gart_supported) | 336 | if (!amd_nb_has_feature(AMD_NB_GART)) |
338 | return -ENODEV; | 337 | return -ENODEV; |
339 | 338 | ||
340 | i = 0; | 339 | i = 0; |
341 | for (i = 0; i < k8_northbridges.num; i++) { | 340 | for (i = 0; i < amd_nb_num(); i++) { |
342 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | 341 | struct pci_dev *dev = node_to_amd_nb(i)->misc; |
343 | if (fix_northbridge(dev, pdev, cap_ptr) < 0) { | 342 | if (fix_northbridge(dev, pdev, cap_ptr) < 0) { |
344 | dev_err(&dev->dev, "no usable aperture found\n"); | 343 | dev_err(&dev->dev, "no usable aperture found\n"); |
345 | #ifdef __x86_64__ | 344 | #ifdef __x86_64__ |
@@ -416,7 +415,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) | |||
416 | } | 415 | } |
417 | 416 | ||
418 | /* shadow x86-64 registers into ULi registers */ | 417 | /* shadow x86-64 registers into ULi registers */ |
419 | pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, | 418 | pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE, |
420 | &httfea); | 419 | &httfea); |
421 | 420 | ||
422 | /* if x86-64 aperture base is beyond 4G, exit here */ | 421 | /* if x86-64 aperture base is beyond 4G, exit here */ |
@@ -484,7 +483,7 @@ static int nforce3_agp_init(struct pci_dev *pdev) | |||
484 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); | 483 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); |
485 | 484 | ||
486 | /* shadow x86-64 registers into NVIDIA registers */ | 485 | /* shadow x86-64 registers into NVIDIA registers */ |
487 | pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, | 486 | pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE, |
488 | &apbase); | 487 | &apbase); |
489 | 488 | ||
490 | /* if x86-64 aperture base is beyond 4G, exit here */ | 489 | /* if x86-64 aperture base is beyond 4G, exit here */ |
@@ -778,7 +777,7 @@ int __init agp_amd64_init(void) | |||
778 | } | 777 | } |
779 | 778 | ||
780 | /* First check that we have at least one AMD64 NB */ | 779 | /* First check that we have at least one AMD64 NB */ |
781 | if (!pci_dev_present(k8_nb_ids)) | 780 | if (!pci_dev_present(amd_nb_misc_ids)) |
782 | return -ENODEV; | 781 | return -ENODEV; |
783 | 782 | ||
784 | /* Look for any AGP bridge */ | 783 | /* Look for any AGP bridge */ |
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index eca9ba193e94..df211181fca4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void) | |||
2917 | 2917 | ||
2918 | opstate_init(); | 2918 | opstate_init(); |
2919 | 2919 | ||
2920 | if (cache_k8_northbridges() < 0) | 2920 | if (amd_cache_northbridges() < 0) |
2921 | goto err_ret; | 2921 | goto err_ret; |
2922 | 2922 | ||
2923 | msrs = msrs_alloc(); | 2923 | msrs = msrs_alloc(); |
@@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void) | |||
2934 | * to finish initialization of the MC instances. | 2934 | * to finish initialization of the MC instances. |
2935 | */ | 2935 | */ |
2936 | err = -ENODEV; | 2936 | err = -ENODEV; |
2937 | for (nb = 0; nb < k8_northbridges.num; nb++) { | 2937 | for (nb = 0; nb < amd_nb_num(); nb++) { |
2938 | if (!pvt_lookup[nb]) | 2938 | if (!pvt_lookup[nb]) |
2939 | continue; | 2939 | continue; |
2940 | 2940 | ||
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 41a9e34899ac..ca35b0ce944a 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sfi.h> | 26 | #include <linux/sfi.h> |
27 | #include <asm/mrst.h> | 27 | #include <asm/mrst.h> |
28 | #include <asm/intel_scu_ipc.h> | 28 | #include <asm/intel_scu_ipc.h> |
29 | #include <asm/mrst.h> | ||
29 | 30 | ||
30 | /* IPC defines the following message types */ | 31 | /* IPC defines the following message types */ |
31 | #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ | 32 | #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ |
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id) | |||
699 | iounmap(ipcdev.ipc_base); | 700 | iounmap(ipcdev.ipc_base); |
700 | return -ENOMEM; | 701 | return -ENOMEM; |
701 | } | 702 | } |
703 | |||
704 | intel_scu_devices_create(); | ||
705 | |||
702 | return 0; | 706 | return 0; |
703 | } | 707 | } |
704 | 708 | ||
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev) | |||
720 | iounmap(ipcdev.ipc_base); | 724 | iounmap(ipcdev.ipc_base); |
721 | iounmap(ipcdev.i2c_base); | 725 | iounmap(ipcdev.i2c_base); |
722 | ipcdev.pdev = NULL; | 726 | ipcdev.pdev = NULL; |
727 | intel_scu_devices_destroy(); | ||
723 | } | 728 | } |
724 | 729 | ||
725 | static const struct pci_device_id pci_ids[] = { | 730 | static const struct pci_device_id pci_ids[] = { |
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 2883428d5ac8..4941cade319f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig | |||
@@ -463,6 +463,18 @@ config RTC_DRV_CMOS | |||
463 | This driver can also be built as a module. If so, the module | 463 | This driver can also be built as a module. If so, the module |
464 | will be called rtc-cmos. | 464 | will be called rtc-cmos. |
465 | 465 | ||
466 | config RTC_DRV_VRTC | ||
467 | tristate "Virtual RTC for Moorestown platforms" | ||
468 | depends on X86_MRST | ||
469 | default y if X86_MRST | ||
470 | |||
471 | help | ||
472 | Say "yes" here to get direct support for the real time clock | ||
473 | found on Moorestown platforms. The VRTC is a emulated RTC that | ||
474 | derives its clock source from a real RTC in the PMIC. The MC146818 | ||
475 | style programming interface is mostly conserved, but any | ||
476 | updates are done via IPC calls to the system controller FW. | ||
477 | |||
466 | config RTC_DRV_DS1216 | 478 | config RTC_DRV_DS1216 |
467 | tristate "Dallas DS1216" | 479 | tristate "Dallas DS1216" |
468 | depends on SNI_RM | 480 | depends on SNI_RM |
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 4c2832df4697..2afdaf3ff986 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile | |||
@@ -30,6 +30,7 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o | |||
30 | obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o | 30 | obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o |
31 | obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o | 31 | obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o |
32 | obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o | 32 | obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o |
33 | obj-$(CONFIG_RTC_DRV_VRTC) += rtc-mrst.o | ||
33 | obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o | 34 | obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o |
34 | obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o | 35 | obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o |
35 | obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o | 36 | obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o |
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c new file mode 100644 index 000000000000..bcd0cf63eb16 --- /dev/null +++ b/drivers/rtc/rtc-mrst.c | |||
@@ -0,0 +1,582 @@ | |||
1 | /* | ||
2 | * rtc-mrst.c: Driver for Moorestown virtual RTC | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * Feng Tang (feng.tang@intel.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; version 2 | ||
11 | * of the License. | ||
12 | * | ||
13 | * Note: | ||
14 | * VRTC is emulated by system controller firmware, the real HW | ||
15 | * RTC is located in the PMIC device. SCU FW shadows PMIC RTC | ||
16 | * in a memory mapped IO space that is visible to the host IA | ||
17 | * processor. | ||
18 | * | ||
19 | * This driver is based upon drivers/rtc/rtc-cmos.c | ||
20 | */ | ||
21 | |||
22 | /* | ||
23 | * Note: | ||
24 | * * vRTC only supports binary mode and 24H mode | ||
25 | * * vRTC only support PIE and AIE, no UIE, and its PIE only happens | ||
26 | * at 23:59:59pm everyday, no support for adjustable frequency | ||
27 | * * Alarm function is also limited to hr/min/sec. | ||
28 | */ | ||
29 | |||
30 | #include <linux/mod_devicetable.h> | ||
31 | #include <linux/platform_device.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/sfi.h> | ||
38 | |||
39 | #include <asm-generic/rtc.h> | ||
40 | #include <asm/intel_scu_ipc.h> | ||
41 | #include <asm/mrst.h> | ||
42 | #include <asm/mrst-vrtc.h> | ||
43 | |||
44 | struct mrst_rtc { | ||
45 | struct rtc_device *rtc; | ||
46 | struct device *dev; | ||
47 | int irq; | ||
48 | struct resource *iomem; | ||
49 | |||
50 | u8 enabled_wake; | ||
51 | u8 suspend_ctrl; | ||
52 | }; | ||
53 | |||
54 | static const char driver_name[] = "rtc_mrst"; | ||
55 | |||
56 | #define RTC_IRQMASK (RTC_PF | RTC_AF) | ||
57 | |||
58 | static inline int is_intr(u8 rtc_intr) | ||
59 | { | ||
60 | if (!(rtc_intr & RTC_IRQF)) | ||
61 | return 0; | ||
62 | return rtc_intr & RTC_IRQMASK; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * rtc_time's year contains the increment over 1900, but vRTC's YEAR | ||
67 | * register can't be programmed to value larger than 0x64, so vRTC | ||
68 | * driver chose to use 1960 (1970 is UNIX time start point) as the base, | ||
69 | * and does the translation at read/write time. | ||
70 | * | ||
71 | * Why not just use 1970 as the offset? it's because using 1960 will | ||
72 | * make it consistent in leap year setting for both vrtc and low-level | ||
73 | * physical rtc devices. | ||
74 | */ | ||
75 | static int mrst_read_time(struct device *dev, struct rtc_time *time) | ||
76 | { | ||
77 | unsigned long flags; | ||
78 | |||
79 | if (rtc_is_updating()) | ||
80 | mdelay(20); | ||
81 | |||
82 | spin_lock_irqsave(&rtc_lock, flags); | ||
83 | time->tm_sec = vrtc_cmos_read(RTC_SECONDS); | ||
84 | time->tm_min = vrtc_cmos_read(RTC_MINUTES); | ||
85 | time->tm_hour = vrtc_cmos_read(RTC_HOURS); | ||
86 | time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); | ||
87 | time->tm_mon = vrtc_cmos_read(RTC_MONTH); | ||
88 | time->tm_year = vrtc_cmos_read(RTC_YEAR); | ||
89 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
90 | |||
91 | /* Adjust for the 1960/1900 */ | ||
92 | time->tm_year += 60; | ||
93 | time->tm_mon--; | ||
94 | return RTC_24H; | ||
95 | } | ||
96 | |||
97 | static int mrst_set_time(struct device *dev, struct rtc_time *time) | ||
98 | { | ||
99 | int ret; | ||
100 | unsigned long flags; | ||
101 | unsigned char mon, day, hrs, min, sec; | ||
102 | unsigned int yrs; | ||
103 | |||
104 | yrs = time->tm_year; | ||
105 | mon = time->tm_mon + 1; /* tm_mon starts at zero */ | ||
106 | day = time->tm_mday; | ||
107 | hrs = time->tm_hour; | ||
108 | min = time->tm_min; | ||
109 | sec = time->tm_sec; | ||
110 | |||
111 | if (yrs < 70 || yrs > 138) | ||
112 | return -EINVAL; | ||
113 | yrs -= 60; | ||
114 | |||
115 | spin_lock_irqsave(&rtc_lock, flags); | ||
116 | |||
117 | vrtc_cmos_write(yrs, RTC_YEAR); | ||
118 | vrtc_cmos_write(mon, RTC_MONTH); | ||
119 | vrtc_cmos_write(day, RTC_DAY_OF_MONTH); | ||
120 | vrtc_cmos_write(hrs, RTC_HOURS); | ||
121 | vrtc_cmos_write(min, RTC_MINUTES); | ||
122 | vrtc_cmos_write(sec, RTC_SECONDS); | ||
123 | |||
124 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
125 | |||
126 | ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t) | ||
131 | { | ||
132 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
133 | unsigned char rtc_control; | ||
134 | |||
135 | if (mrst->irq <= 0) | ||
136 | return -EIO; | ||
137 | |||
138 | /* Basic alarms only support hour, minute, and seconds fields. | ||
139 | * Some also support day and month, for alarms up to a year in | ||
140 | * the future. | ||
141 | */ | ||
142 | t->time.tm_mday = -1; | ||
143 | t->time.tm_mon = -1; | ||
144 | t->time.tm_year = -1; | ||
145 | |||
146 | /* vRTC only supports binary mode */ | ||
147 | spin_lock_irq(&rtc_lock); | ||
148 | t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM); | ||
149 | t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM); | ||
150 | t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM); | ||
151 | |||
152 | rtc_control = vrtc_cmos_read(RTC_CONTROL); | ||
153 | spin_unlock_irq(&rtc_lock); | ||
154 | |||
155 | t->enabled = !!(rtc_control & RTC_AIE); | ||
156 | t->pending = 0; | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control) | ||
162 | { | ||
163 | unsigned char rtc_intr; | ||
164 | |||
165 | /* | ||
166 | * NOTE after changing RTC_xIE bits we always read INTR_FLAGS; | ||
167 | * allegedly some older rtcs need that to handle irqs properly | ||
168 | */ | ||
169 | rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS); | ||
170 | rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; | ||
171 | if (is_intr(rtc_intr)) | ||
172 | rtc_update_irq(mrst->rtc, 1, rtc_intr); | ||
173 | } | ||
174 | |||
175 | static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask) | ||
176 | { | ||
177 | unsigned char rtc_control; | ||
178 | |||
179 | /* | ||
180 | * Flush any pending IRQ status, notably for update irqs, | ||
181 | * before we enable new IRQs | ||
182 | */ | ||
183 | rtc_control = vrtc_cmos_read(RTC_CONTROL); | ||
184 | mrst_checkintr(mrst, rtc_control); | ||
185 | |||
186 | rtc_control |= mask; | ||
187 | vrtc_cmos_write(rtc_control, RTC_CONTROL); | ||
188 | |||
189 | mrst_checkintr(mrst, rtc_control); | ||
190 | } | ||
191 | |||
192 | static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask) | ||
193 | { | ||
194 | unsigned char rtc_control; | ||
195 | |||
196 | rtc_control = vrtc_cmos_read(RTC_CONTROL); | ||
197 | rtc_control &= ~mask; | ||
198 | vrtc_cmos_write(rtc_control, RTC_CONTROL); | ||
199 | mrst_checkintr(mrst, rtc_control); | ||
200 | } | ||
201 | |||
202 | static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t) | ||
203 | { | ||
204 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
205 | unsigned char hrs, min, sec; | ||
206 | int ret = 0; | ||
207 | |||
208 | if (!mrst->irq) | ||
209 | return -EIO; | ||
210 | |||
211 | hrs = t->time.tm_hour; | ||
212 | min = t->time.tm_min; | ||
213 | sec = t->time.tm_sec; | ||
214 | |||
215 | spin_lock_irq(&rtc_lock); | ||
216 | /* Next rtc irq must not be from previous alarm setting */ | ||
217 | mrst_irq_disable(mrst, RTC_AIE); | ||
218 | |||
219 | /* Update alarm */ | ||
220 | vrtc_cmos_write(hrs, RTC_HOURS_ALARM); | ||
221 | vrtc_cmos_write(min, RTC_MINUTES_ALARM); | ||
222 | vrtc_cmos_write(sec, RTC_SECONDS_ALARM); | ||
223 | |||
224 | spin_unlock_irq(&rtc_lock); | ||
225 | |||
226 | ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM); | ||
227 | if (ret) | ||
228 | return ret; | ||
229 | |||
230 | spin_lock_irq(&rtc_lock); | ||
231 | if (t->enabled) | ||
232 | mrst_irq_enable(mrst, RTC_AIE); | ||
233 | |||
234 | spin_unlock_irq(&rtc_lock); | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static int mrst_irq_set_state(struct device *dev, int enabled) | ||
240 | { | ||
241 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
242 | unsigned long flags; | ||
243 | |||
244 | if (!mrst->irq) | ||
245 | return -ENXIO; | ||
246 | |||
247 | spin_lock_irqsave(&rtc_lock, flags); | ||
248 | |||
249 | if (enabled) | ||
250 | mrst_irq_enable(mrst, RTC_PIE); | ||
251 | else | ||
252 | mrst_irq_disable(mrst, RTC_PIE); | ||
253 | |||
254 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | #if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE) | ||
259 | |||
260 | /* Currently, the vRTC doesn't support UIE ON/OFF */ | ||
261 | static int | ||
262 | mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) | ||
263 | { | ||
264 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
265 | unsigned long flags; | ||
266 | |||
267 | switch (cmd) { | ||
268 | case RTC_AIE_OFF: | ||
269 | case RTC_AIE_ON: | ||
270 | if (!mrst->irq) | ||
271 | return -EINVAL; | ||
272 | break; | ||
273 | default: | ||
274 | /* PIE ON/OFF is handled by mrst_irq_set_state() */ | ||
275 | return -ENOIOCTLCMD; | ||
276 | } | ||
277 | |||
278 | spin_lock_irqsave(&rtc_lock, flags); | ||
279 | switch (cmd) { | ||
280 | case RTC_AIE_OFF: /* alarm off */ | ||
281 | mrst_irq_disable(mrst, RTC_AIE); | ||
282 | break; | ||
283 | case RTC_AIE_ON: /* alarm on */ | ||
284 | mrst_irq_enable(mrst, RTC_AIE); | ||
285 | break; | ||
286 | } | ||
287 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | #else | ||
292 | #define mrst_rtc_ioctl NULL | ||
293 | #endif | ||
294 | |||
295 | #if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) | ||
296 | |||
297 | static int mrst_procfs(struct device *dev, struct seq_file *seq) | ||
298 | { | ||
299 | unsigned char rtc_control, valid; | ||
300 | |||
301 | spin_lock_irq(&rtc_lock); | ||
302 | rtc_control = vrtc_cmos_read(RTC_CONTROL); | ||
303 | valid = vrtc_cmos_read(RTC_VALID); | ||
304 | spin_unlock_irq(&rtc_lock); | ||
305 | |||
306 | return seq_printf(seq, | ||
307 | "periodic_IRQ\t: %s\n" | ||
308 | "alarm\t\t: %s\n" | ||
309 | "BCD\t\t: no\n" | ||
310 | "periodic_freq\t: daily (not adjustable)\n", | ||
311 | (rtc_control & RTC_PIE) ? "on" : "off", | ||
312 | (rtc_control & RTC_AIE) ? "on" : "off"); | ||
313 | } | ||
314 | |||
315 | #else | ||
316 | #define mrst_procfs NULL | ||
317 | #endif | ||
318 | |||
319 | static const struct rtc_class_ops mrst_rtc_ops = { | ||
320 | .ioctl = mrst_rtc_ioctl, | ||
321 | .read_time = mrst_read_time, | ||
322 | .set_time = mrst_set_time, | ||
323 | .read_alarm = mrst_read_alarm, | ||
324 | .set_alarm = mrst_set_alarm, | ||
325 | .proc = mrst_procfs, | ||
326 | .irq_set_state = mrst_irq_set_state, | ||
327 | }; | ||
328 | |||
329 | static struct mrst_rtc mrst_rtc; | ||
330 | |||
331 | /* | ||
332 | * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in | ||
333 | * Reg B, so no need for this driver to clear it | ||
334 | */ | ||
335 | static irqreturn_t mrst_rtc_irq(int irq, void *p) | ||
336 | { | ||
337 | u8 irqstat; | ||
338 | |||
339 | spin_lock(&rtc_lock); | ||
340 | /* This read will clear all IRQ flags inside Reg C */ | ||
341 | irqstat = vrtc_cmos_read(RTC_INTR_FLAGS); | ||
342 | spin_unlock(&rtc_lock); | ||
343 | |||
344 | irqstat &= RTC_IRQMASK | RTC_IRQF; | ||
345 | if (is_intr(irqstat)) { | ||
346 | rtc_update_irq(p, 1, irqstat); | ||
347 | return IRQ_HANDLED; | ||
348 | } | ||
349 | return IRQ_NONE; | ||
350 | } | ||
351 | |||
352 | static int __init | ||
353 | vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq) | ||
354 | { | ||
355 | int retval = 0; | ||
356 | unsigned char rtc_control; | ||
357 | |||
358 | /* There can be only one ... */ | ||
359 | if (mrst_rtc.dev) | ||
360 | return -EBUSY; | ||
361 | |||
362 | if (!iomem) | ||
363 | return -ENODEV; | ||
364 | |||
365 | iomem = request_mem_region(iomem->start, | ||
366 | iomem->end + 1 - iomem->start, | ||
367 | driver_name); | ||
368 | if (!iomem) { | ||
369 | dev_dbg(dev, "i/o mem already in use.\n"); | ||
370 | return -EBUSY; | ||
371 | } | ||
372 | |||
373 | mrst_rtc.irq = rtc_irq; | ||
374 | mrst_rtc.iomem = iomem; | ||
375 | |||
376 | mrst_rtc.rtc = rtc_device_register(driver_name, dev, | ||
377 | &mrst_rtc_ops, THIS_MODULE); | ||
378 | if (IS_ERR(mrst_rtc.rtc)) { | ||
379 | retval = PTR_ERR(mrst_rtc.rtc); | ||
380 | goto cleanup0; | ||
381 | } | ||
382 | |||
383 | mrst_rtc.dev = dev; | ||
384 | dev_set_drvdata(dev, &mrst_rtc); | ||
385 | rename_region(iomem, dev_name(&mrst_rtc.rtc->dev)); | ||
386 | |||
387 | spin_lock_irq(&rtc_lock); | ||
388 | mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE); | ||
389 | rtc_control = vrtc_cmos_read(RTC_CONTROL); | ||
390 | spin_unlock_irq(&rtc_lock); | ||
391 | |||
392 | if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY))) | ||
393 | dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n"); | ||
394 | |||
395 | if (rtc_irq) { | ||
396 | retval = request_irq(rtc_irq, mrst_rtc_irq, | ||
397 | IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev), | ||
398 | mrst_rtc.rtc); | ||
399 | if (retval < 0) { | ||
400 | dev_dbg(dev, "IRQ %d is already in use, err %d\n", | ||
401 | rtc_irq, retval); | ||
402 | goto cleanup1; | ||
403 | } | ||
404 | } | ||
405 | dev_dbg(dev, "initialised\n"); | ||
406 | return 0; | ||
407 | |||
408 | cleanup1: | ||
409 | mrst_rtc.dev = NULL; | ||
410 | rtc_device_unregister(mrst_rtc.rtc); | ||
411 | cleanup0: | ||
412 | release_region(iomem->start, iomem->end + 1 - iomem->start); | ||
413 | dev_err(dev, "rtc-mrst: unable to initialise\n"); | ||
414 | return retval; | ||
415 | } | ||
416 | |||
417 | static void rtc_mrst_do_shutdown(void) | ||
418 | { | ||
419 | spin_lock_irq(&rtc_lock); | ||
420 | mrst_irq_disable(&mrst_rtc, RTC_IRQMASK); | ||
421 | spin_unlock_irq(&rtc_lock); | ||
422 | } | ||
423 | |||
424 | static void __exit rtc_mrst_do_remove(struct device *dev) | ||
425 | { | ||
426 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
427 | struct resource *iomem; | ||
428 | |||
429 | rtc_mrst_do_shutdown(); | ||
430 | |||
431 | if (mrst->irq) | ||
432 | free_irq(mrst->irq, mrst->rtc); | ||
433 | |||
434 | rtc_device_unregister(mrst->rtc); | ||
435 | mrst->rtc = NULL; | ||
436 | |||
437 | iomem = mrst->iomem; | ||
438 | release_region(iomem->start, iomem->end + 1 - iomem->start); | ||
439 | mrst->iomem = NULL; | ||
440 | |||
441 | mrst->dev = NULL; | ||
442 | dev_set_drvdata(dev, NULL); | ||
443 | } | ||
444 | |||
445 | #ifdef CONFIG_PM | ||
446 | static int mrst_suspend(struct device *dev, pm_message_t mesg) | ||
447 | { | ||
448 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
449 | unsigned char tmp; | ||
450 | |||
451 | /* Only the alarm might be a wakeup event source */ | ||
452 | spin_lock_irq(&rtc_lock); | ||
453 | mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL); | ||
454 | if (tmp & (RTC_PIE | RTC_AIE)) { | ||
455 | unsigned char mask; | ||
456 | |||
457 | if (device_may_wakeup(dev)) | ||
458 | mask = RTC_IRQMASK & ~RTC_AIE; | ||
459 | else | ||
460 | mask = RTC_IRQMASK; | ||
461 | tmp &= ~mask; | ||
462 | vrtc_cmos_write(tmp, RTC_CONTROL); | ||
463 | |||
464 | mrst_checkintr(mrst, tmp); | ||
465 | } | ||
466 | spin_unlock_irq(&rtc_lock); | ||
467 | |||
468 | if (tmp & RTC_AIE) { | ||
469 | mrst->enabled_wake = 1; | ||
470 | enable_irq_wake(mrst->irq); | ||
471 | } | ||
472 | |||
473 | dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n", | ||
474 | (tmp & RTC_AIE) ? ", alarm may wake" : "", | ||
475 | tmp); | ||
476 | |||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * We want RTC alarms to wake us from the deep power saving state | ||
482 | */ | ||
483 | static inline int mrst_poweroff(struct device *dev) | ||
484 | { | ||
485 | return mrst_suspend(dev, PMSG_HIBERNATE); | ||
486 | } | ||
487 | |||
488 | static int mrst_resume(struct device *dev) | ||
489 | { | ||
490 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | ||
491 | unsigned char tmp = mrst->suspend_ctrl; | ||
492 | |||
493 | /* Re-enable any irqs previously active */ | ||
494 | if (tmp & RTC_IRQMASK) { | ||
495 | unsigned char mask; | ||
496 | |||
497 | if (mrst->enabled_wake) { | ||
498 | disable_irq_wake(mrst->irq); | ||
499 | mrst->enabled_wake = 0; | ||
500 | } | ||
501 | |||
502 | spin_lock_irq(&rtc_lock); | ||
503 | do { | ||
504 | vrtc_cmos_write(tmp, RTC_CONTROL); | ||
505 | |||
506 | mask = vrtc_cmos_read(RTC_INTR_FLAGS); | ||
507 | mask &= (tmp & RTC_IRQMASK) | RTC_IRQF; | ||
508 | if (!is_intr(mask)) | ||
509 | break; | ||
510 | |||
511 | rtc_update_irq(mrst->rtc, 1, mask); | ||
512 | tmp &= ~RTC_AIE; | ||
513 | } while (mask & RTC_AIE); | ||
514 | spin_unlock_irq(&rtc_lock); | ||
515 | } | ||
516 | |||
517 | dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp); | ||
518 | |||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | #else | ||
523 | #define mrst_suspend NULL | ||
524 | #define mrst_resume NULL | ||
525 | |||
526 | static inline int mrst_poweroff(struct device *dev) | ||
527 | { | ||
528 | return -ENOSYS; | ||
529 | } | ||
530 | |||
531 | #endif | ||
532 | |||
533 | static int __init vrtc_mrst_platform_probe(struct platform_device *pdev) | ||
534 | { | ||
535 | return vrtc_mrst_do_probe(&pdev->dev, | ||
536 | platform_get_resource(pdev, IORESOURCE_MEM, 0), | ||
537 | platform_get_irq(pdev, 0)); | ||
538 | } | ||
539 | |||
540 | static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev) | ||
541 | { | ||
542 | rtc_mrst_do_remove(&pdev->dev); | ||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | static void vrtc_mrst_platform_shutdown(struct platform_device *pdev) | ||
547 | { | ||
548 | if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev)) | ||
549 | return; | ||
550 | |||
551 | rtc_mrst_do_shutdown(); | ||
552 | } | ||
553 | |||
554 | MODULE_ALIAS("platform:vrtc_mrst"); | ||
555 | |||
556 | static struct platform_driver vrtc_mrst_platform_driver = { | ||
557 | .probe = vrtc_mrst_platform_probe, | ||
558 | .remove = __exit_p(vrtc_mrst_platform_remove), | ||
559 | .shutdown = vrtc_mrst_platform_shutdown, | ||
560 | .driver = { | ||
561 | .name = (char *) driver_name, | ||
562 | .suspend = mrst_suspend, | ||
563 | .resume = mrst_resume, | ||
564 | } | ||
565 | }; | ||
566 | |||
567 | static int __init vrtc_mrst_init(void) | ||
568 | { | ||
569 | return platform_driver_register(&vrtc_mrst_platform_driver); | ||
570 | } | ||
571 | |||
572 | static void __exit vrtc_mrst_exit(void) | ||
573 | { | ||
574 | platform_driver_unregister(&vrtc_mrst_platform_driver); | ||
575 | } | ||
576 | |||
577 | module_init(vrtc_mrst_init); | ||
578 | module_exit(vrtc_mrst_exit); | ||
579 | |||
580 | MODULE_AUTHOR("Jacob Pan; Feng Tang"); | ||
581 | MODULE_DESCRIPTION("Driver for Moorestown virtual RTC"); | ||
582 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5476c066d4ee..3c4039d5eef1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
763 | int metadata; | 763 | int metadata; |
764 | unsigned int revokes = 0; | 764 | unsigned int revokes = 0; |
765 | int x; | 765 | int x; |
766 | int error; | 766 | int error = 0; |
767 | 767 | ||
768 | if (!*top) | 768 | if (!*top) |
769 | sm->sm_first = 0; | 769 | sm->sm_first = 0; |
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
780 | if (metadata) | 780 | if (metadata) |
781 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 781 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
782 | 782 | ||
783 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); | 783 | if (ip != GFS2_I(sdp->sd_rindex)) |
784 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); | ||
785 | else if (!sdp->sd_rgrps) | ||
786 | error = gfs2_ri_update(ip); | ||
787 | |||
784 | if (error) | 788 | if (error) |
785 | return error; | 789 | return error; |
786 | 790 | ||
@@ -879,7 +883,8 @@ out_rg_gunlock: | |||
879 | out_rlist: | 883 | out_rlist: |
880 | gfs2_rlist_free(&rlist); | 884 | gfs2_rlist_free(&rlist); |
881 | out: | 885 | out: |
882 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); | 886 | if (ip != GFS2_I(sdp->sd_rindex)) |
887 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); | ||
883 | return error; | 888 | return error; |
884 | } | 889 | } |
885 | 890 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f92c17704169..08a8beb152e6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -541,21 +541,6 @@ out_locked: | |||
541 | spin_unlock(&gl->gl_spin); | 541 | spin_unlock(&gl->gl_spin); |
542 | } | 542 | } |
543 | 543 | ||
544 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
545 | unsigned int req_state, | ||
546 | unsigned int flags) | ||
547 | { | ||
548 | int ret = LM_OUT_ERROR; | ||
549 | |||
550 | if (!sdp->sd_lockstruct.ls_ops->lm_lock) | ||
551 | return req_state == LM_ST_UNLOCKED ? 0 : req_state; | ||
552 | |||
553 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
554 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, | ||
555 | req_state, flags); | ||
556 | return ret; | ||
557 | } | ||
558 | |||
559 | /** | 544 | /** |
560 | * do_xmote - Calls the DLM to change the state of a lock | 545 | * do_xmote - Calls the DLM to change the state of a lock |
561 | * @gl: The lock state | 546 | * @gl: The lock state |
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin) | |||
575 | 560 | ||
576 | lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | | 561 | lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | |
577 | LM_FLAG_PRIORITY); | 562 | LM_FLAG_PRIORITY); |
578 | BUG_ON(gl->gl_state == target); | 563 | GLOCK_BUG_ON(gl, gl->gl_state == target); |
579 | BUG_ON(gl->gl_state == gl->gl_target); | 564 | GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); |
580 | if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && | 565 | if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && |
581 | glops->go_inval) { | 566 | glops->go_inval) { |
582 | set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | 567 | set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); |
583 | do_error(gl, 0); /* Fail queued try locks */ | 568 | do_error(gl, 0); /* Fail queued try locks */ |
584 | } | 569 | } |
570 | gl->gl_req = target; | ||
585 | spin_unlock(&gl->gl_spin); | 571 | spin_unlock(&gl->gl_spin); |
586 | if (glops->go_xmote_th) | 572 | if (glops->go_xmote_th) |
587 | glops->go_xmote_th(gl); | 573 | glops->go_xmote_th(gl); |
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin) | |||
594 | gl->gl_state == LM_ST_DEFERRED) && | 580 | gl->gl_state == LM_ST_DEFERRED) && |
595 | !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | 581 | !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) |
596 | lck_flags |= LM_FLAG_TRY_1CB; | 582 | lck_flags |= LM_FLAG_TRY_1CB; |
597 | ret = gfs2_lm_lock(sdp, gl, target, lck_flags); | ||
598 | 583 | ||
599 | if (!(ret & LM_OUT_ASYNC)) { | 584 | if (sdp->sd_lockstruct.ls_ops->lm_lock) { |
600 | finish_xmote(gl, ret); | 585 | /* lock_dlm */ |
586 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); | ||
587 | GLOCK_BUG_ON(gl, ret); | ||
588 | } else { /* lock_nolock */ | ||
589 | finish_xmote(gl, target); | ||
601 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 590 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
602 | gfs2_glock_put(gl); | 591 | gfs2_glock_put(gl); |
603 | } else { | ||
604 | GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); | ||
605 | } | 592 | } |
593 | |||
606 | spin_lock(&gl->gl_spin); | 594 | spin_lock(&gl->gl_spin); |
607 | } | 595 | } |
608 | 596 | ||
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh) | |||
951 | 939 | ||
952 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) | 940 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) |
953 | { | 941 | { |
942 | struct va_format vaf; | ||
954 | va_list args; | 943 | va_list args; |
955 | 944 | ||
956 | va_start(args, fmt); | 945 | va_start(args, fmt); |
946 | |||
957 | if (seq) { | 947 | if (seq) { |
958 | struct gfs2_glock_iter *gi = seq->private; | 948 | struct gfs2_glock_iter *gi = seq->private; |
959 | vsprintf(gi->string, fmt, args); | 949 | vsprintf(gi->string, fmt, args); |
960 | seq_printf(seq, gi->string); | 950 | seq_printf(seq, gi->string); |
961 | } else { | 951 | } else { |
962 | printk(KERN_ERR " "); | 952 | vaf.fmt = fmt; |
963 | vprintk(fmt, args); | 953 | vaf.va = &args; |
954 | |||
955 | printk(KERN_ERR " %pV", &vaf); | ||
964 | } | 956 | } |
957 | |||
965 | va_end(args); | 958 | va_end(args); |
966 | } | 959 | } |
967 | 960 | ||
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) | |||
1361 | * @gl: Pointer to the glock | 1354 | * @gl: Pointer to the glock |
1362 | * @ret: The return value from the dlm | 1355 | * @ret: The return value from the dlm |
1363 | * | 1356 | * |
1357 | * The gl_reply field is under the gl_spin lock so that it is ok | ||
1358 | * to use a bitfield shared with other glock state fields. | ||
1364 | */ | 1359 | */ |
1365 | 1360 | ||
1366 | void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | 1361 | void gfs2_glock_complete(struct gfs2_glock *gl, int ret) |
1367 | { | 1362 | { |
1368 | struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; | 1363 | struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; |
1369 | 1364 | ||
1365 | spin_lock(&gl->gl_spin); | ||
1370 | gl->gl_reply = ret; | 1366 | gl->gl_reply = ret; |
1371 | 1367 | ||
1372 | if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { | 1368 | if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { |
1373 | spin_lock(&gl->gl_spin); | ||
1374 | if (gfs2_should_freeze(gl)) { | 1369 | if (gfs2_should_freeze(gl)) { |
1375 | set_bit(GLF_FROZEN, &gl->gl_flags); | 1370 | set_bit(GLF_FROZEN, &gl->gl_flags); |
1376 | spin_unlock(&gl->gl_spin); | 1371 | spin_unlock(&gl->gl_spin); |
1377 | return; | 1372 | return; |
1378 | } | 1373 | } |
1379 | spin_unlock(&gl->gl_spin); | ||
1380 | } | 1374 | } |
1375 | |||
1376 | spin_unlock(&gl->gl_spin); | ||
1381 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | 1377 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); |
1378 | smp_wmb(); | ||
1382 | gfs2_glock_hold(gl); | 1379 | gfs2_glock_hold(gl); |
1383 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 1380 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
1384 | gfs2_glock_put(gl); | 1381 | gfs2_glock_put(gl); |
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1626 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | 1623 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) |
1627 | { | 1624 | { |
1628 | struct task_struct *gh_owner = NULL; | 1625 | struct task_struct *gh_owner = NULL; |
1629 | char buffer[KSYM_SYMBOL_LEN]; | ||
1630 | char flags_buf[32]; | 1626 | char flags_buf[32]; |
1631 | 1627 | ||
1632 | sprint_symbol(buffer, gh->gh_ip); | ||
1633 | if (gh->gh_owner_pid) | 1628 | if (gh->gh_owner_pid) |
1634 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1629 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
1635 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", | 1630 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", |
1636 | state2str(gh->gh_state), | 1631 | state2str(gh->gh_state), |
1637 | hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), | 1632 | hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), |
1638 | gh->gh_error, | 1633 | gh->gh_error, |
1639 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, | 1634 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
1640 | gh_owner ? gh_owner->comm : "(ended)", buffer); | 1635 | gh_owner ? gh_owner->comm : "(ended)", |
1636 | (void *)gh->gh_ip); | ||
1641 | return 0; | 1637 | return 0; |
1642 | } | 1638 | } |
1643 | 1639 | ||
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void) | |||
1782 | } | 1778 | } |
1783 | #endif | 1779 | #endif |
1784 | 1780 | ||
1785 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | | 1781 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | |
1786 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | 1782 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); |
1787 | if (IS_ERR(glock_workqueue)) | 1783 | if (IS_ERR(glock_workqueue)) |
1788 | return PTR_ERR(glock_workqueue); | 1784 | return PTR_ERR(glock_workqueue); |
1789 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | | 1785 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", |
1790 | WQ_FREEZEABLE, 0); | 1786 | WQ_MEM_RECLAIM | WQ_FREEZEABLE, |
1787 | 0); | ||
1791 | if (IS_ERR(gfs2_delete_workqueue)) { | 1788 | if (IS_ERR(gfs2_delete_workqueue)) { |
1792 | destroy_workqueue(glock_workqueue); | 1789 | destroy_workqueue(glock_workqueue); |
1793 | return PTR_ERR(gfs2_delete_workqueue); | 1790 | return PTR_ERR(gfs2_delete_workqueue); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index db1c26d6d220..691851ceb615 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -87,11 +87,10 @@ enum { | |||
87 | #define GL_ASYNC 0x00000040 | 87 | #define GL_ASYNC 0x00000040 |
88 | #define GL_EXACT 0x00000080 | 88 | #define GL_EXACT 0x00000080 |
89 | #define GL_SKIP 0x00000100 | 89 | #define GL_SKIP 0x00000100 |
90 | #define GL_ATIME 0x00000200 | ||
91 | #define GL_NOCACHE 0x00000400 | 90 | #define GL_NOCACHE 0x00000400 |
92 | 91 | ||
93 | /* | 92 | /* |
94 | * lm_lock() and lm_async_cb return flags | 93 | * lm_async_cb return flags |
95 | * | 94 | * |
96 | * LM_OUT_ST_MASK | 95 | * LM_OUT_ST_MASK |
97 | * Masks the lower two bits of lock state in the returned value. | 96 | * Masks the lower two bits of lock state in the returned value. |
@@ -99,15 +98,11 @@ enum { | |||
99 | * LM_OUT_CANCELED | 98 | * LM_OUT_CANCELED |
100 | * The lock request was canceled. | 99 | * The lock request was canceled. |
101 | * | 100 | * |
102 | * LM_OUT_ASYNC | ||
103 | * The result of the request will be returned in an LM_CB_ASYNC callback. | ||
104 | * | ||
105 | */ | 101 | */ |
106 | 102 | ||
107 | #define LM_OUT_ST_MASK 0x00000003 | 103 | #define LM_OUT_ST_MASK 0x00000003 |
108 | #define LM_OUT_CANCELED 0x00000008 | 104 | #define LM_OUT_CANCELED 0x00000008 |
109 | #define LM_OUT_ASYNC 0x00000080 | 105 | #define LM_OUT_ERROR 0x00000004 |
110 | #define LM_OUT_ERROR 0x00000100 | ||
111 | 106 | ||
112 | /* | 107 | /* |
113 | * lm_recovery_done() messages | 108 | * lm_recovery_done() messages |
@@ -124,25 +119,12 @@ struct lm_lockops { | |||
124 | void (*lm_unmount) (struct gfs2_sbd *sdp); | 119 | void (*lm_unmount) (struct gfs2_sbd *sdp); |
125 | void (*lm_withdraw) (struct gfs2_sbd *sdp); | 120 | void (*lm_withdraw) (struct gfs2_sbd *sdp); |
126 | void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); | 121 | void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); |
127 | unsigned int (*lm_lock) (struct gfs2_glock *gl, | 122 | int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, |
128 | unsigned int req_state, unsigned int flags); | 123 | unsigned int flags); |
129 | void (*lm_cancel) (struct gfs2_glock *gl); | 124 | void (*lm_cancel) (struct gfs2_glock *gl); |
130 | const match_table_t *lm_tokens; | 125 | const match_table_t *lm_tokens; |
131 | }; | 126 | }; |
132 | 127 | ||
133 | #define LM_FLAG_TRY 0x00000001 | ||
134 | #define LM_FLAG_TRY_1CB 0x00000002 | ||
135 | #define LM_FLAG_NOEXP 0x00000004 | ||
136 | #define LM_FLAG_ANY 0x00000008 | ||
137 | #define LM_FLAG_PRIORITY 0x00000010 | ||
138 | |||
139 | #define GL_ASYNC 0x00000040 | ||
140 | #define GL_EXACT 0x00000080 | ||
141 | #define GL_SKIP 0x00000100 | ||
142 | #define GL_NOCACHE 0x00000400 | ||
143 | |||
144 | #define GLR_TRYFAILED 13 | ||
145 | |||
146 | extern struct workqueue_struct *gfs2_delete_workqueue; | 128 | extern struct workqueue_struct *gfs2_delete_workqueue; |
147 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 129 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
148 | { | 130 | { |
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | |||
212 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 194 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
213 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 195 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
214 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 196 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
197 | |||
198 | __attribute__ ((format(printf, 2, 3))) | ||
215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 199 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
216 | 200 | ||
217 | /** | 201 | /** |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 0d149dcc04e5..263561bf1a50 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl) | |||
325 | 325 | ||
326 | if (gl->gl_state != LM_ST_UNLOCKED && | 326 | if (gl->gl_state != LM_ST_UNLOCKED && |
327 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | 327 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
328 | flush_workqueue(gfs2_delete_workqueue); | ||
329 | gfs2_meta_syncfs(sdp); | 328 | gfs2_meta_syncfs(sdp); |
330 | gfs2_log_shutdown(sdp); | 329 | gfs2_log_shutdown(sdp); |
331 | } | 330 | } |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 764fbb49efc8..8d3d2b4a0a7d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -207,12 +207,14 @@ struct gfs2_glock { | |||
207 | 207 | ||
208 | spinlock_t gl_spin; | 208 | spinlock_t gl_spin; |
209 | 209 | ||
210 | unsigned int gl_state; | 210 | /* State fields protected by gl_spin */ |
211 | unsigned int gl_target; | 211 | unsigned int gl_state:2, /* Current state */ |
212 | unsigned int gl_reply; | 212 | gl_target:2, /* Target state */ |
213 | gl_demote_state:2, /* State requested by remote node */ | ||
214 | gl_req:2, /* State in last dlm request */ | ||
215 | gl_reply:8; /* Last reply from the dlm */ | ||
216 | |||
213 | unsigned int gl_hash; | 217 | unsigned int gl_hash; |
214 | unsigned int gl_req; | ||
215 | unsigned int gl_demote_state; /* state requested by remote node */ | ||
216 | unsigned long gl_demote_time; /* time of first demote request */ | 218 | unsigned long gl_demote_time; /* time of first demote request */ |
217 | struct list_head gl_holders; | 219 | struct list_head gl_holders; |
218 | 220 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e1213f7f9217..14e682dbe8bf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | |||
916 | if (error) | 916 | if (error) |
917 | return error; | 917 | return error; |
918 | 918 | ||
919 | if ((attr->ia_valid & ATTR_SIZE) && | ||
920 | attr->ia_size != i_size_read(inode)) { | ||
921 | error = vmtruncate(inode, attr->ia_size); | ||
922 | if (error) | ||
923 | return error; | ||
924 | } | ||
925 | |||
926 | setattr_copy(inode, attr); | 919 | setattr_copy(inode, attr); |
927 | mark_inode_dirty(inode); | 920 | mark_inode_dirty(inode); |
928 | |||
929 | gfs2_assert_warn(GFS2_SB(inode), !error); | ||
930 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 921 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
931 | gfs2_dinode_out(ip, dibh->b_data); | 922 | gfs2_dinode_out(ip, dibh->b_data); |
932 | brelse(dibh); | 923 | brelse(dibh); |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 1c09425b45fd..6e493aee28f8 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, | |||
146 | return lkf; | 146 | return lkf; |
147 | } | 147 | } |
148 | 148 | ||
149 | static unsigned int gdlm_lock(struct gfs2_glock *gl, | 149 | static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, |
150 | unsigned int req_state, unsigned int flags) | 150 | unsigned int flags) |
151 | { | 151 | { |
152 | struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; | 152 | struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; |
153 | int error; | ||
154 | int req; | 153 | int req; |
155 | u32 lkf; | 154 | u32 lkf; |
156 | 155 | ||
157 | gl->gl_req = req_state; | ||
158 | req = make_mode(req_state); | 156 | req = make_mode(req_state); |
159 | lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); | 157 | lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); |
160 | 158 | ||
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, | |||
162 | * Submit the actual lock request. | 160 | * Submit the actual lock request. |
163 | */ | 161 | */ |
164 | 162 | ||
165 | error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, | 163 | return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, |
166 | GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); | 164 | GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); |
167 | if (error == -EAGAIN) | ||
168 | return 0; | ||
169 | if (error) | ||
170 | return LM_OUT_ERROR; | ||
171 | return LM_OUT_ASYNC; | ||
172 | } | 165 | } |
173 | 166 | ||
174 | static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) | 167 | static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..1db6b7343229 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -1069,7 +1069,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1069 | { | 1069 | { |
1070 | struct gfs2_inode *ip = GFS2_I(inode); | 1070 | struct gfs2_inode *ip = GFS2_I(inode); |
1071 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1071 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1072 | struct buffer_head *dibh; | ||
1073 | u32 ouid, ogid, nuid, ngid; | 1072 | u32 ouid, ogid, nuid, ngid; |
1074 | int error; | 1073 | int error; |
1075 | 1074 | ||
@@ -1100,25 +1099,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1100 | if (error) | 1099 | if (error) |
1101 | goto out_gunlock_q; | 1100 | goto out_gunlock_q; |
1102 | 1101 | ||
1103 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1102 | error = gfs2_setattr_simple(ip, attr); |
1104 | if (error) | 1103 | if (error) |
1105 | goto out_end_trans; | 1104 | goto out_end_trans; |
1106 | 1105 | ||
1107 | if ((attr->ia_valid & ATTR_SIZE) && | ||
1108 | attr->ia_size != i_size_read(inode)) { | ||
1109 | int error; | ||
1110 | |||
1111 | error = vmtruncate(inode, attr->ia_size); | ||
1112 | gfs2_assert_warn(sdp, !error); | ||
1113 | } | ||
1114 | |||
1115 | setattr_copy(inode, attr); | ||
1116 | mark_inode_dirty(inode); | ||
1117 | |||
1118 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1119 | gfs2_dinode_out(ip, dibh->b_data); | ||
1120 | brelse(dibh); | ||
1121 | |||
1122 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { | 1106 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { |
1123 | u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); | 1107 | u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); |
1124 | gfs2_quota_change(ip, -blocks, ouid, ogid); | 1108 | gfs2_quota_change(ip, -blocks, ouid, ogid); |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f606baf9ba72..a689901963de 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
666 | qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); | 666 | qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); |
667 | qd->qd_qb.qb_limit = qp->qu_limit; | 667 | qd->qd_qb.qb_limit = qp->qu_limit; |
668 | } | 668 | } |
669 | if (fdq->d_fieldmask & FS_DQ_BCOUNT) { | ||
670 | qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); | ||
671 | qd->qd_qb.qb_value = qp->qu_value; | ||
672 | } | ||
669 | } | 673 | } |
670 | 674 | ||
671 | /* Write the quota into the quota file on disk */ | 675 | /* Write the quota into the quota file on disk */ |
@@ -1509,7 +1513,7 @@ out: | |||
1509 | } | 1513 | } |
1510 | 1514 | ||
1511 | /* GFS2 only supports a subset of the XFS fields */ | 1515 | /* GFS2 only supports a subset of the XFS fields */ |
1512 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) | 1516 | #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) |
1513 | 1517 | ||
1514 | static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | 1518 | static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, |
1515 | struct fs_disk_quota *fdq) | 1519 | struct fs_disk_quota *fdq) |
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1569 | if ((fdq->d_fieldmask & FS_DQ_BSOFT) && | 1573 | if ((fdq->d_fieldmask & FS_DQ_BSOFT) && |
1570 | ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) | 1574 | ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) |
1571 | fdq->d_fieldmask ^= FS_DQ_BSOFT; | 1575 | fdq->d_fieldmask ^= FS_DQ_BSOFT; |
1576 | |||
1572 | if ((fdq->d_fieldmask & FS_DQ_BHARD) && | 1577 | if ((fdq->d_fieldmask & FS_DQ_BHARD) && |
1573 | ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) | 1578 | ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) |
1574 | fdq->d_fieldmask ^= FS_DQ_BHARD; | 1579 | fdq->d_fieldmask ^= FS_DQ_BHARD; |
1580 | |||
1581 | if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && | ||
1582 | ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) | ||
1583 | fdq->d_fieldmask ^= FS_DQ_BCOUNT; | ||
1584 | |||
1575 | if (fdq->d_fieldmask == 0) | 1585 | if (fdq->d_fieldmask == 0) |
1576 | goto out_i; | 1586 | goto out_i; |
1577 | 1587 | ||
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = { | |||
1620 | .get_dqblk = gfs2_get_dqblk, | 1630 | .get_dqblk = gfs2_get_dqblk, |
1621 | .set_dqblk = gfs2_set_dqblk, | 1631 | .set_dqblk = gfs2_set_dqblk, |
1622 | }; | 1632 | }; |
1623 | |||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 33c8407b876f..7293ea27020c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
500 | for (rgrps = 0;; rgrps++) { | 500 | for (rgrps = 0;; rgrps++) { |
501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); | 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); |
502 | 502 | ||
503 | if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) | 503 | if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) |
504 | break; | 504 | break; |
505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, |
506 | sizeof(struct gfs2_rindex)); | 506 | sizeof(struct gfs2_rindex)); |
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
583 | * Returns: 0 on successful update, error code otherwise | 583 | * Returns: 0 on successful update, error code otherwise |
584 | */ | 584 | */ |
585 | 585 | ||
586 | static int gfs2_ri_update(struct gfs2_inode *ip) | 586 | int gfs2_ri_update(struct gfs2_inode *ip) |
587 | { | 587 | { |
588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
589 | struct inode *inode = &ip->i_inode; | 589 | struct inode *inode = &ip->i_inode; |
@@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
614 | } | 614 | } |
615 | 615 | ||
616 | /** | 616 | /** |
617 | * gfs2_ri_update_special - Pull in a new resource index from the disk | ||
618 | * | ||
619 | * This is a special version that's safe to call from gfs2_inplace_reserve_i. | ||
620 | * In this case we know that we don't have any resource groups in memory yet. | ||
621 | * | ||
622 | * @ip: pointer to the rindex inode | ||
623 | * | ||
624 | * Returns: 0 on successful update, error code otherwise | ||
625 | */ | ||
626 | static int gfs2_ri_update_special(struct gfs2_inode *ip) | ||
627 | { | ||
628 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
629 | struct inode *inode = &ip->i_inode; | ||
630 | struct file_ra_state ra_state; | ||
631 | struct gfs2_rgrpd *rgd; | ||
632 | unsigned int max_data = 0; | ||
633 | int error; | ||
634 | |||
635 | file_ra_state_init(&ra_state, inode->i_mapping); | ||
636 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | ||
637 | /* Ignore partials */ | ||
638 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > | ||
639 | i_size_read(inode)) | ||
640 | break; | ||
641 | error = read_rindex_entry(ip, &ra_state); | ||
642 | if (error) { | ||
643 | clear_rgrpdi(sdp); | ||
644 | return error; | ||
645 | } | ||
646 | } | ||
647 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
648 | if (rgd->rd_data > max_data) | ||
649 | max_data = rgd->rd_data; | ||
650 | sdp->sd_max_rg_data = max_data; | ||
651 | |||
652 | sdp->sd_rindex_uptodate = 1; | ||
653 | return 0; | ||
654 | } | ||
655 | |||
656 | /** | ||
657 | * gfs2_rindex_hold - Grab a lock on the rindex | 617 | * gfs2_rindex_hold - Grab a lock on the rindex |
658 | * @sdp: The GFS2 superblock | 618 | * @sdp: The GFS2 superblock |
659 | * @ri_gh: the glock holder | 619 | * @ri_gh: the glock holder |
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | |||
1226 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | 1186 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); |
1227 | else if (!sdp->sd_rgrps) /* We may not have the rindex read | 1187 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
1228 | in, so: */ | 1188 | in, so: */ |
1229 | error = gfs2_ri_update_special(ip); | 1189 | error = gfs2_ri_update(ip); |
1230 | if (error) | 1190 | if (error) |
1231 | return error; | 1191 | return error; |
1232 | } | 1192 | } |
1233 | 1193 | ||
1194 | try_again: | ||
1234 | do { | 1195 | do { |
1235 | error = get_local_rgrp(ip, &last_unlinked); | 1196 | error = get_local_rgrp(ip, &last_unlinked); |
1236 | /* If there is no space, flushing the log may release some */ | 1197 | /* If there is no space, flushing the log may release some */ |
1237 | if (error) | 1198 | if (error) { |
1199 | if (ip == GFS2_I(sdp->sd_rindex) && | ||
1200 | !sdp->sd_rindex_uptodate) { | ||
1201 | error = gfs2_ri_update(ip); | ||
1202 | if (error) | ||
1203 | return error; | ||
1204 | goto try_again; | ||
1205 | } | ||
1238 | gfs2_log_flush(sdp, NULL); | 1206 | gfs2_log_flush(sdp, NULL); |
1207 | } | ||
1239 | } while (error && tries++ < 3); | 1208 | } while (error && tries++ < 3); |
1240 | 1209 | ||
1241 | if (error) { | 1210 | if (error) { |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 0e35c0466f9a..50c2bb04369c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | |||
48 | 48 | ||
49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
50 | 50 | ||
51 | extern int gfs2_ri_update(struct gfs2_inode *ip); | ||
51 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); | 52 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); |
52 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); | 53 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); |
53 | 54 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 30b58f07c8a6..439b61c03262 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -1296,10 +1296,8 @@ fail: | |||
1296 | 1296 | ||
1297 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | 1297 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) |
1298 | { | 1298 | { |
1299 | struct inode *inode = &ip->i_inode; | ||
1300 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1299 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1301 | struct gfs2_ea_location el; | 1300 | struct gfs2_ea_location el; |
1302 | struct buffer_head *dibh; | ||
1303 | int error; | 1301 | int error; |
1304 | 1302 | ||
1305 | error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); | 1303 | error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); |
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | |||
1321 | if (error) | 1319 | if (error) |
1322 | return error; | 1320 | return error; |
1323 | 1321 | ||
1324 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1322 | error = gfs2_setattr_simple(ip, attr); |
1325 | if (error) | ||
1326 | goto out_trans_end; | ||
1327 | |||
1328 | if ((attr->ia_valid & ATTR_SIZE) && | ||
1329 | attr->ia_size != i_size_read(inode)) { | ||
1330 | int error; | ||
1331 | |||
1332 | error = vmtruncate(inode, attr->ia_size); | ||
1333 | gfs2_assert_warn(GFS2_SB(inode), !error); | ||
1334 | } | ||
1335 | |||
1336 | setattr_copy(inode, attr); | ||
1337 | mark_inode_dirty(inode); | ||
1338 | |||
1339 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1340 | gfs2_dinode_out(ip, dibh->b_data); | ||
1341 | brelse(dibh); | ||
1342 | |||
1343 | out_trans_end: | ||
1344 | gfs2_trans_end(sdp); | 1323 | gfs2_trans_end(sdp); |
1345 | return error; | 1324 | return error; |
1346 | } | 1325 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 182845147fe4..08cba2c3b612 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = { | |||
1407 | 1407 | ||
1408 | #endif | 1408 | #endif |
1409 | 1409 | ||
1410 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
1411 | /* | ||
1412 | * Print out autogroup related information: | ||
1413 | */ | ||
1414 | static int sched_autogroup_show(struct seq_file *m, void *v) | ||
1415 | { | ||
1416 | struct inode *inode = m->private; | ||
1417 | struct task_struct *p; | ||
1418 | |||
1419 | p = get_proc_task(inode); | ||
1420 | if (!p) | ||
1421 | return -ESRCH; | ||
1422 | proc_sched_autogroup_show_task(p, m); | ||
1423 | |||
1424 | put_task_struct(p); | ||
1425 | |||
1426 | return 0; | ||
1427 | } | ||
1428 | |||
1429 | static ssize_t | ||
1430 | sched_autogroup_write(struct file *file, const char __user *buf, | ||
1431 | size_t count, loff_t *offset) | ||
1432 | { | ||
1433 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1434 | struct task_struct *p; | ||
1435 | char buffer[PROC_NUMBUF]; | ||
1436 | long nice; | ||
1437 | int err; | ||
1438 | |||
1439 | memset(buffer, 0, sizeof(buffer)); | ||
1440 | if (count > sizeof(buffer) - 1) | ||
1441 | count = sizeof(buffer) - 1; | ||
1442 | if (copy_from_user(buffer, buf, count)) | ||
1443 | return -EFAULT; | ||
1444 | |||
1445 | err = strict_strtol(strstrip(buffer), 0, &nice); | ||
1446 | if (err) | ||
1447 | return -EINVAL; | ||
1448 | |||
1449 | p = get_proc_task(inode); | ||
1450 | if (!p) | ||
1451 | return -ESRCH; | ||
1452 | |||
1453 | err = nice; | ||
1454 | err = proc_sched_autogroup_set_nice(p, &err); | ||
1455 | if (err) | ||
1456 | count = err; | ||
1457 | |||
1458 | put_task_struct(p); | ||
1459 | |||
1460 | return count; | ||
1461 | } | ||
1462 | |||
1463 | static int sched_autogroup_open(struct inode *inode, struct file *filp) | ||
1464 | { | ||
1465 | int ret; | ||
1466 | |||
1467 | ret = single_open(filp, sched_autogroup_show, NULL); | ||
1468 | if (!ret) { | ||
1469 | struct seq_file *m = filp->private_data; | ||
1470 | |||
1471 | m->private = inode; | ||
1472 | } | ||
1473 | return ret; | ||
1474 | } | ||
1475 | |||
1476 | static const struct file_operations proc_pid_sched_autogroup_operations = { | ||
1477 | .open = sched_autogroup_open, | ||
1478 | .read = seq_read, | ||
1479 | .write = sched_autogroup_write, | ||
1480 | .llseek = seq_lseek, | ||
1481 | .release = single_release, | ||
1482 | }; | ||
1483 | |||
1484 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
1485 | |||
1410 | static ssize_t comm_write(struct file *file, const char __user *buf, | 1486 | static ssize_t comm_write(struct file *file, const char __user *buf, |
1411 | size_t count, loff_t *offset) | 1487 | size_t count, loff_t *offset) |
1412 | { | 1488 | { |
@@ -2733,6 +2809,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2733 | #ifdef CONFIG_SCHED_DEBUG | 2809 | #ifdef CONFIG_SCHED_DEBUG |
2734 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2810 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2735 | #endif | 2811 | #endif |
2812 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2813 | REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), | ||
2814 | #endif | ||
2736 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 2815 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
2737 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 2816 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
2738 | INF("syscall", S_IRUSR, proc_pid_syscall), | 2817 | INF("syscall", S_IRUSR, proc_pid_syscall), |
diff --git a/include/linux/completion.h b/include/linux/completion.h index 36d57f74cd01..51494e6b5548 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x); | |||
81 | extern int wait_for_completion_killable(struct completion *x); | 81 | extern int wait_for_completion_killable(struct completion *x); |
82 | extern unsigned long wait_for_completion_timeout(struct completion *x, | 82 | extern unsigned long wait_for_completion_timeout(struct completion *x, |
83 | unsigned long timeout); | 83 | unsigned long timeout); |
84 | extern unsigned long wait_for_completion_interruptible_timeout( | 84 | extern long wait_for_completion_interruptible_timeout( |
85 | struct completion *x, unsigned long timeout); | 85 | struct completion *x, unsigned long timeout); |
86 | extern unsigned long wait_for_completion_killable_timeout( | 86 | extern long wait_for_completion_killable_timeout( |
87 | struct completion *x, unsigned long timeout); | 87 | struct completion *x, unsigned long timeout); |
88 | extern bool try_wait_for_completion(struct completion *x); | 88 | extern bool try_wait_for_completion(struct completion *x); |
89 | extern bool completion_done(struct completion *x); | 89 | extern bool completion_done(struct completion *x); |
90 | 90 | ||
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index a90b3892074a..1c70028f81f9 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h | |||
@@ -44,34 +44,24 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, | |||
44 | extern int ddebug_remove_module(const char *mod_name); | 44 | extern int ddebug_remove_module(const char *mod_name); |
45 | 45 | ||
46 | #define dynamic_pr_debug(fmt, ...) do { \ | 46 | #define dynamic_pr_debug(fmt, ...) do { \ |
47 | __label__ do_printk; \ | ||
48 | __label__ out; \ | ||
49 | static struct _ddebug descriptor \ | 47 | static struct _ddebug descriptor \ |
50 | __used \ | 48 | __used \ |
51 | __attribute__((section("__verbose"), aligned(8))) = \ | 49 | __attribute__((section("__verbose"), aligned(8))) = \ |
52 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ | 50 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ |
53 | _DPRINTK_FLAGS_DEFAULT }; \ | 51 | _DPRINTK_FLAGS_DEFAULT }; \ |
54 | JUMP_LABEL(&descriptor.enabled, do_printk); \ | 52 | if (unlikely(descriptor.enabled)) \ |
55 | goto out; \ | 53 | printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ |
56 | do_printk: \ | ||
57 | printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ | ||
58 | out: ; \ | ||
59 | } while (0) | 54 | } while (0) |
60 | 55 | ||
61 | 56 | ||
62 | #define dynamic_dev_dbg(dev, fmt, ...) do { \ | 57 | #define dynamic_dev_dbg(dev, fmt, ...) do { \ |
63 | __label__ do_printk; \ | ||
64 | __label__ out; \ | ||
65 | static struct _ddebug descriptor \ | 58 | static struct _ddebug descriptor \ |
66 | __used \ | 59 | __used \ |
67 | __attribute__((section("__verbose"), aligned(8))) = \ | 60 | __attribute__((section("__verbose"), aligned(8))) = \ |
68 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ | 61 | { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ |
69 | _DPRINTK_FLAGS_DEFAULT }; \ | 62 | _DPRINTK_FLAGS_DEFAULT }; \ |
70 | JUMP_LABEL(&descriptor.enabled, do_printk); \ | 63 | if (unlikely(descriptor.enabled)) \ |
71 | goto out; \ | 64 | dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ |
72 | do_printk: \ | ||
73 | dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ | ||
74 | out: ; \ | ||
75 | } while (0) | 65 | } while (0) |
76 | 66 | ||
77 | #else | 67 | #else |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0c1b857d3d..330586ffffbb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/wait.h> | 22 | #include <linux/wait.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
25 | 25 | #include <linux/timerqueue.h> | |
26 | 26 | ||
27 | struct hrtimer_clock_base; | 27 | struct hrtimer_clock_base; |
28 | struct hrtimer_cpu_base; | 28 | struct hrtimer_cpu_base; |
@@ -79,8 +79,8 @@ enum hrtimer_restart { | |||
79 | 79 | ||
80 | /** | 80 | /** |
81 | * struct hrtimer - the basic hrtimer structure | 81 | * struct hrtimer - the basic hrtimer structure |
82 | * @node: red black tree node for time ordered insertion | 82 | * @node: timerqueue node, which also manages node.expires, |
83 | * @_expires: the absolute expiry time in the hrtimers internal | 83 | * the absolute expiry time in the hrtimers internal |
84 | * representation. The time is related to the clock on | 84 | * representation. The time is related to the clock on |
85 | * which the timer is based. Is setup by adding | 85 | * which the timer is based. Is setup by adding |
86 | * slack to the _softexpires value. For non range timers | 86 | * slack to the _softexpires value. For non range timers |
@@ -101,8 +101,7 @@ enum hrtimer_restart { | |||
101 | * The hrtimer structure must be initialized by hrtimer_init() | 101 | * The hrtimer structure must be initialized by hrtimer_init() |
102 | */ | 102 | */ |
103 | struct hrtimer { | 103 | struct hrtimer { |
104 | struct rb_node node; | 104 | struct timerqueue_node node; |
105 | ktime_t _expires; | ||
106 | ktime_t _softexpires; | 105 | ktime_t _softexpires; |
107 | enum hrtimer_restart (*function)(struct hrtimer *); | 106 | enum hrtimer_restart (*function)(struct hrtimer *); |
108 | struct hrtimer_clock_base *base; | 107 | struct hrtimer_clock_base *base; |
@@ -141,8 +140,7 @@ struct hrtimer_sleeper { | |||
141 | struct hrtimer_clock_base { | 140 | struct hrtimer_clock_base { |
142 | struct hrtimer_cpu_base *cpu_base; | 141 | struct hrtimer_cpu_base *cpu_base; |
143 | clockid_t index; | 142 | clockid_t index; |
144 | struct rb_root active; | 143 | struct timerqueue_head active; |
145 | struct rb_node *first; | ||
146 | ktime_t resolution; | 144 | ktime_t resolution; |
147 | ktime_t (*get_time)(void); | 145 | ktime_t (*get_time)(void); |
148 | ktime_t softirq_time; | 146 | ktime_t softirq_time; |
@@ -158,7 +156,6 @@ struct hrtimer_clock_base { | |||
158 | * @lock: lock protecting the base and associated clock bases | 156 | * @lock: lock protecting the base and associated clock bases |
159 | * and timers | 157 | * and timers |
160 | * @clock_base: array of clock bases for this cpu | 158 | * @clock_base: array of clock bases for this cpu |
161 | * @curr_timer: the timer which is executing a callback right now | ||
162 | * @expires_next: absolute time of the next event which was scheduled | 159 | * @expires_next: absolute time of the next event which was scheduled |
163 | * via clock_set_next_event() | 160 | * via clock_set_next_event() |
164 | * @hres_active: State of high resolution mode | 161 | * @hres_active: State of high resolution mode |
@@ -184,43 +181,43 @@ struct hrtimer_cpu_base { | |||
184 | 181 | ||
185 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) | 182 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) |
186 | { | 183 | { |
187 | timer->_expires = time; | 184 | timer->node.expires = time; |
188 | timer->_softexpires = time; | 185 | timer->_softexpires = time; |
189 | } | 186 | } |
190 | 187 | ||
191 | static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) | 188 | static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) |
192 | { | 189 | { |
193 | timer->_softexpires = time; | 190 | timer->_softexpires = time; |
194 | timer->_expires = ktime_add_safe(time, delta); | 191 | timer->node.expires = ktime_add_safe(time, delta); |
195 | } | 192 | } |
196 | 193 | ||
197 | static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) | 194 | static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) |
198 | { | 195 | { |
199 | timer->_softexpires = time; | 196 | timer->_softexpires = time; |
200 | timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); | 197 | timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); |
201 | } | 198 | } |
202 | 199 | ||
203 | static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) | 200 | static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) |
204 | { | 201 | { |
205 | timer->_expires.tv64 = tv64; | 202 | timer->node.expires.tv64 = tv64; |
206 | timer->_softexpires.tv64 = tv64; | 203 | timer->_softexpires.tv64 = tv64; |
207 | } | 204 | } |
208 | 205 | ||
209 | static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) | 206 | static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) |
210 | { | 207 | { |
211 | timer->_expires = ktime_add_safe(timer->_expires, time); | 208 | timer->node.expires = ktime_add_safe(timer->node.expires, time); |
212 | timer->_softexpires = ktime_add_safe(timer->_softexpires, time); | 209 | timer->_softexpires = ktime_add_safe(timer->_softexpires, time); |
213 | } | 210 | } |
214 | 211 | ||
215 | static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) | 212 | static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) |
216 | { | 213 | { |
217 | timer->_expires = ktime_add_ns(timer->_expires, ns); | 214 | timer->node.expires = ktime_add_ns(timer->node.expires, ns); |
218 | timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); | 215 | timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); |
219 | } | 216 | } |
220 | 217 | ||
221 | static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) | 218 | static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) |
222 | { | 219 | { |
223 | return timer->_expires; | 220 | return timer->node.expires; |
224 | } | 221 | } |
225 | 222 | ||
226 | static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) | 223 | static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) |
@@ -230,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) | |||
230 | 227 | ||
231 | static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) | 228 | static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) |
232 | { | 229 | { |
233 | return timer->_expires.tv64; | 230 | return timer->node.expires.tv64; |
234 | } | 231 | } |
235 | static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) | 232 | static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) |
236 | { | 233 | { |
@@ -239,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) | |||
239 | 236 | ||
240 | static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) | 237 | static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) |
241 | { | 238 | { |
242 | return ktime_to_ns(timer->_expires); | 239 | return ktime_to_ns(timer->node.expires); |
243 | } | 240 | } |
244 | 241 | ||
245 | static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) | 242 | static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) |
246 | { | 243 | { |
247 | return ktime_sub(timer->_expires, timer->base->get_time()); | 244 | return ktime_sub(timer->node.expires, timer->base->get_time()); |
248 | } | 245 | } |
249 | 246 | ||
250 | #ifdef CONFIG_HIGH_RES_TIMERS | 247 | #ifdef CONFIG_HIGH_RES_TIMERS |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f8c06ce0fa6..caa151fbebb7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -12,6 +12,13 @@ | |||
12 | #include <linux/securebits.h> | 12 | #include <linux/securebits.h> |
13 | #include <net/net_namespace.h> | 13 | #include <net/net_namespace.h> |
14 | 14 | ||
15 | #ifdef CONFIG_SMP | ||
16 | # define INIT_PUSHABLE_TASKS(tsk) \ | ||
17 | .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), | ||
18 | #else | ||
19 | # define INIT_PUSHABLE_TASKS(tsk) | ||
20 | #endif | ||
21 | |||
15 | extern struct files_struct init_files; | 22 | extern struct files_struct init_files; |
16 | extern struct fs_struct init_fs; | 23 | extern struct fs_struct init_fs; |
17 | 24 | ||
@@ -83,6 +90,12 @@ extern struct group_info init_groups; | |||
83 | */ | 90 | */ |
84 | # define CAP_INIT_BSET CAP_FULL_SET | 91 | # define CAP_INIT_BSET CAP_FULL_SET |
85 | 92 | ||
93 | #ifdef CONFIG_RCU_BOOST | ||
94 | #define INIT_TASK_RCU_BOOST() \ | ||
95 | .rcu_boost_mutex = NULL, | ||
96 | #else | ||
97 | #define INIT_TASK_RCU_BOOST() | ||
98 | #endif | ||
86 | #ifdef CONFIG_TREE_PREEMPT_RCU | 99 | #ifdef CONFIG_TREE_PREEMPT_RCU |
87 | #define INIT_TASK_RCU_TREE_PREEMPT() \ | 100 | #define INIT_TASK_RCU_TREE_PREEMPT() \ |
88 | .rcu_blocked_node = NULL, | 101 | .rcu_blocked_node = NULL, |
@@ -94,7 +107,8 @@ extern struct group_info init_groups; | |||
94 | .rcu_read_lock_nesting = 0, \ | 107 | .rcu_read_lock_nesting = 0, \ |
95 | .rcu_read_unlock_special = 0, \ | 108 | .rcu_read_unlock_special = 0, \ |
96 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ | 109 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ |
97 | INIT_TASK_RCU_TREE_PREEMPT() | 110 | INIT_TASK_RCU_TREE_PREEMPT() \ |
111 | INIT_TASK_RCU_BOOST() | ||
98 | #else | 112 | #else |
99 | #define INIT_TASK_RCU_PREEMPT(tsk) | 113 | #define INIT_TASK_RCU_PREEMPT(tsk) |
100 | #endif | 114 | #endif |
@@ -137,7 +151,7 @@ extern struct cred init_cred; | |||
137 | .nr_cpus_allowed = NR_CPUS, \ | 151 | .nr_cpus_allowed = NR_CPUS, \ |
138 | }, \ | 152 | }, \ |
139 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ | 153 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ |
140 | .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ | 154 | INIT_PUSHABLE_TASKS(tsk) \ |
141 | .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ | 155 | .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ |
142 | .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ | 156 | .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ |
143 | .real_parent = &tsk, \ | 157 | .real_parent = &tsk, \ |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 79d0c4f6d071..55e0d4253e49 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); | |||
114 | struct irqaction { | 114 | struct irqaction { |
115 | irq_handler_t handler; | 115 | irq_handler_t handler; |
116 | unsigned long flags; | 116 | unsigned long flags; |
117 | const char *name; | ||
118 | void *dev_id; | 117 | void *dev_id; |
119 | struct irqaction *next; | 118 | struct irqaction *next; |
120 | int irq; | 119 | int irq; |
121 | struct proc_dir_entry *dir; | ||
122 | irq_handler_t thread_fn; | 120 | irq_handler_t thread_fn; |
123 | struct task_struct *thread; | 121 | struct task_struct *thread; |
124 | unsigned long thread_flags; | 122 | unsigned long thread_flags; |
125 | }; | 123 | const char *name; |
124 | struct proc_dir_entry *dir; | ||
125 | } ____cacheline_internodealigned_in_smp; | ||
126 | 126 | ||
127 | extern irqreturn_t no_action(int cpl, void *dev_id); | 127 | extern irqreturn_t no_action(int cpl, void *dev_id); |
128 | 128 | ||
diff --git a/include/linux/module.h b/include/linux/module.h index 7575bbbdf2a2..8b17fd8c790d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -308,6 +308,9 @@ struct module | |||
308 | /* The size of the executable code in each section. */ | 308 | /* The size of the executable code in each section. */ |
309 | unsigned int init_text_size, core_text_size; | 309 | unsigned int init_text_size, core_text_size; |
310 | 310 | ||
311 | /* Size of RO sections of the module (text+rodata) */ | ||
312 | unsigned int init_ro_size, core_ro_size; | ||
313 | |||
311 | /* Arch-specific module values */ | 314 | /* Arch-specific module values */ |
312 | struct mod_arch_specific arch; | 315 | struct mod_arch_specific arch; |
313 | 316 | ||
@@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) | |||
672 | { | 675 | { |
673 | return 0; | 676 | return 0; |
674 | } | 677 | } |
675 | |||
676 | #endif /* CONFIG_MODULES */ | 678 | #endif /* CONFIG_MODULES */ |
677 | 679 | ||
678 | #ifdef CONFIG_SYSFS | 680 | #ifdef CONFIG_SYSFS |
@@ -687,6 +689,13 @@ extern int module_sysfs_initialized; | |||
687 | 689 | ||
688 | #define __MODULE_STRING(x) __stringify(x) | 690 | #define __MODULE_STRING(x) __stringify(x) |
689 | 691 | ||
692 | #ifdef CONFIG_DEBUG_SET_MODULE_RONX | ||
693 | extern void set_all_modules_text_rw(void); | ||
694 | extern void set_all_modules_text_ro(void); | ||
695 | #else | ||
696 | static inline void set_all_modules_text_rw(void) { } | ||
697 | static inline void set_all_modules_text_ro(void) { } | ||
698 | #endif | ||
690 | 699 | ||
691 | #ifdef CONFIG_GENERIC_BUG | 700 | #ifdef CONFIG_GENERIC_BUG |
692 | void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, | 701 | void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f363bc8fdc74..94b48bd40dd7 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock); | |||
160 | extern void mutex_unlock(struct mutex *lock); | 160 | extern void mutex_unlock(struct mutex *lock); |
161 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | 161 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); |
162 | 162 | ||
163 | #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX | ||
164 | #define arch_mutex_cpu_relax() cpu_relax() | ||
165 | #endif | ||
166 | |||
163 | #endif | 167 | #endif |
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index f31ef61f1c65..2dea94fc4402 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h | |||
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
241 | #define list_first_entry_rcu(ptr, type, member) \ | 241 | #define list_first_entry_rcu(ptr, type, member) \ |
242 | list_entry_rcu((ptr)->next, type, member) | 242 | list_entry_rcu((ptr)->next, type, member) |
243 | 243 | ||
244 | #define __list_for_each_rcu(pos, head) \ | ||
245 | for (pos = rcu_dereference_raw(list_next_rcu(head)); \ | ||
246 | pos != (head); \ | ||
247 | pos = rcu_dereference_raw(list_next_rcu((pos))) | ||
248 | |||
249 | /** | 244 | /** |
250 | * list_for_each_entry_rcu - iterate over rcu list of given type | 245 | * list_for_each_entry_rcu - iterate over rcu list of given type |
251 | * @pos: the type * to use as a loop cursor. | 246 | * @pos: the type * to use as a loop cursor. |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03cda7bed985..af5614856285 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -47,6 +47,8 @@ | |||
47 | extern int rcutorture_runnable; /* for sysctl */ | 47 | extern int rcutorture_runnable; /* for sysctl */ |
48 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 48 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
49 | 49 | ||
50 | #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) | ||
51 | #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) | ||
50 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 52 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
51 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | 53 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) |
52 | 54 | ||
@@ -66,7 +68,6 @@ extern void call_rcu_sched(struct rcu_head *head, | |||
66 | extern void synchronize_sched(void); | 68 | extern void synchronize_sched(void); |
67 | extern void rcu_barrier_bh(void); | 69 | extern void rcu_barrier_bh(void); |
68 | extern void rcu_barrier_sched(void); | 70 | extern void rcu_barrier_sched(void); |
69 | extern void synchronize_sched_expedited(void); | ||
70 | extern int sched_expedited_torture_stats(char *page); | 71 | extern int sched_expedited_torture_stats(char *page); |
71 | 72 | ||
72 | static inline void __rcu_read_lock_bh(void) | 73 | static inline void __rcu_read_lock_bh(void) |
@@ -118,7 +119,6 @@ static inline int rcu_preempt_depth(void) | |||
118 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | 119 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
119 | 120 | ||
120 | /* Internal to kernel */ | 121 | /* Internal to kernel */ |
121 | extern void rcu_init(void); | ||
122 | extern void rcu_sched_qs(int cpu); | 122 | extern void rcu_sched_qs(int cpu); |
123 | extern void rcu_bh_qs(int cpu); | 123 | extern void rcu_bh_qs(int cpu); |
124 | extern void rcu_check_callbacks(int cpu, int user); | 124 | extern void rcu_check_callbacks(int cpu, int user); |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 13877cb93a60..30ebd7c8d874 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
@@ -27,7 +27,9 @@ | |||
27 | 27 | ||
28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
29 | 29 | ||
30 | #define rcu_init_sched() do { } while (0) | 30 | static inline void rcu_init(void) |
31 | { | ||
32 | } | ||
31 | 33 | ||
32 | #ifdef CONFIG_TINY_RCU | 34 | #ifdef CONFIG_TINY_RCU |
33 | 35 | ||
@@ -58,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |||
58 | synchronize_sched(); | 60 | synchronize_sched(); |
59 | } | 61 | } |
60 | 62 | ||
63 | static inline void synchronize_sched_expedited(void) | ||
64 | { | ||
65 | synchronize_sched(); | ||
66 | } | ||
67 | |||
61 | #ifdef CONFIG_TINY_RCU | 68 | #ifdef CONFIG_TINY_RCU |
62 | 69 | ||
63 | static inline void rcu_preempt_note_context_switch(void) | 70 | static inline void rcu_preempt_note_context_switch(void) |
@@ -125,16 +132,12 @@ static inline void rcu_cpu_stall_reset(void) | |||
125 | } | 132 | } |
126 | 133 | ||
127 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 134 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
128 | |||
129 | extern int rcu_scheduler_active __read_mostly; | 135 | extern int rcu_scheduler_active __read_mostly; |
130 | extern void rcu_scheduler_starting(void); | 136 | extern void rcu_scheduler_starting(void); |
131 | |||
132 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 137 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
133 | |||
134 | static inline void rcu_scheduler_starting(void) | 138 | static inline void rcu_scheduler_starting(void) |
135 | { | 139 | { |
136 | } | 140 | } |
137 | |||
138 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 141 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
139 | 142 | ||
140 | #endif /* __LINUX_RCUTINY_H */ | 143 | #endif /* __LINUX_RCUTINY_H */ |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 95518e628794..3a933482734a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #ifndef __LINUX_RCUTREE_H | 30 | #ifndef __LINUX_RCUTREE_H |
31 | #define __LINUX_RCUTREE_H | 31 | #define __LINUX_RCUTREE_H |
32 | 32 | ||
33 | extern void rcu_init(void); | ||
33 | extern void rcu_note_context_switch(int cpu); | 34 | extern void rcu_note_context_switch(int cpu); |
34 | extern int rcu_needs_cpu(int cpu); | 35 | extern int rcu_needs_cpu(int cpu); |
35 | extern void rcu_cpu_stall_reset(void); | 36 | extern void rcu_cpu_stall_reset(void); |
@@ -47,6 +48,7 @@ static inline void exit_rcu(void) | |||
47 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 48 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
48 | 49 | ||
49 | extern void synchronize_rcu_bh(void); | 50 | extern void synchronize_rcu_bh(void); |
51 | extern void synchronize_sched_expedited(void); | ||
50 | extern void synchronize_rcu_expedited(void); | 52 | extern void synchronize_rcu_expedited(void); |
51 | 53 | ||
52 | static inline void synchronize_rcu_bh_expedited(void) | 54 | static inline void synchronize_rcu_bh_expedited(void) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index a99d735db3df..777cd01e240e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -513,6 +513,8 @@ struct thread_group_cputimer { | |||
513 | spinlock_t lock; | 513 | spinlock_t lock; |
514 | }; | 514 | }; |
515 | 515 | ||
516 | struct autogroup; | ||
517 | |||
516 | /* | 518 | /* |
517 | * NOTE! "signal_struct" does not have it's own | 519 | * NOTE! "signal_struct" does not have it's own |
518 | * locking, because a shared signal_struct always | 520 | * locking, because a shared signal_struct always |
@@ -580,6 +582,9 @@ struct signal_struct { | |||
580 | 582 | ||
581 | struct tty_struct *tty; /* NULL if no tty */ | 583 | struct tty_struct *tty; /* NULL if no tty */ |
582 | 584 | ||
585 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
586 | struct autogroup *autogroup; | ||
587 | #endif | ||
583 | /* | 588 | /* |
584 | * Cumulative resource counters for dead threads in the group, | 589 | * Cumulative resource counters for dead threads in the group, |
585 | * and for reaped dead child processes forked by this group. | 590 | * and for reaped dead child processes forked by this group. |
@@ -1233,13 +1238,18 @@ struct task_struct { | |||
1233 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1238 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1234 | struct rcu_node *rcu_blocked_node; | 1239 | struct rcu_node *rcu_blocked_node; |
1235 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1240 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1241 | #ifdef CONFIG_RCU_BOOST | ||
1242 | struct rt_mutex *rcu_boost_mutex; | ||
1243 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
1236 | 1244 | ||
1237 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1245 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1238 | struct sched_info sched_info; | 1246 | struct sched_info sched_info; |
1239 | #endif | 1247 | #endif |
1240 | 1248 | ||
1241 | struct list_head tasks; | 1249 | struct list_head tasks; |
1250 | #ifdef CONFIG_SMP | ||
1242 | struct plist_node pushable_tasks; | 1251 | struct plist_node pushable_tasks; |
1252 | #endif | ||
1243 | 1253 | ||
1244 | struct mm_struct *mm, *active_mm; | 1254 | struct mm_struct *mm, *active_mm; |
1245 | #if defined(SPLIT_RSS_COUNTING) | 1255 | #if defined(SPLIT_RSS_COUNTING) |
@@ -1763,7 +1773,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * | |||
1763 | #ifdef CONFIG_PREEMPT_RCU | 1773 | #ifdef CONFIG_PREEMPT_RCU |
1764 | 1774 | ||
1765 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ | 1775 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ |
1766 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ | 1776 | #define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ |
1777 | #define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ | ||
1767 | 1778 | ||
1768 | static inline void rcu_copy_process(struct task_struct *p) | 1779 | static inline void rcu_copy_process(struct task_struct *p) |
1769 | { | 1780 | { |
@@ -1771,7 +1782,10 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1771 | p->rcu_read_unlock_special = 0; | 1782 | p->rcu_read_unlock_special = 0; |
1772 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1783 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1773 | p->rcu_blocked_node = NULL; | 1784 | p->rcu_blocked_node = NULL; |
1774 | #endif | 1785 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1786 | #ifdef CONFIG_RCU_BOOST | ||
1787 | p->rcu_boost_mutex = NULL; | ||
1788 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
1775 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1789 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1776 | } | 1790 | } |
1777 | 1791 | ||
@@ -1876,14 +1890,11 @@ extern void sched_clock_idle_sleep_event(void); | |||
1876 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1890 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
1877 | 1891 | ||
1878 | #ifdef CONFIG_HOTPLUG_CPU | 1892 | #ifdef CONFIG_HOTPLUG_CPU |
1879 | extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); | ||
1880 | extern void idle_task_exit(void); | 1893 | extern void idle_task_exit(void); |
1881 | #else | 1894 | #else |
1882 | static inline void idle_task_exit(void) {} | 1895 | static inline void idle_task_exit(void) {} |
1883 | #endif | 1896 | #endif |
1884 | 1897 | ||
1885 | extern void sched_idle_next(void); | ||
1886 | |||
1887 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 1898 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
1888 | extern void wake_up_idle_cpu(int cpu); | 1899 | extern void wake_up_idle_cpu(int cpu); |
1889 | #else | 1900 | #else |
@@ -1893,8 +1904,6 @@ static inline void wake_up_idle_cpu(int cpu) { } | |||
1893 | extern unsigned int sysctl_sched_latency; | 1904 | extern unsigned int sysctl_sched_latency; |
1894 | extern unsigned int sysctl_sched_min_granularity; | 1905 | extern unsigned int sysctl_sched_min_granularity; |
1895 | extern unsigned int sysctl_sched_wakeup_granularity; | 1906 | extern unsigned int sysctl_sched_wakeup_granularity; |
1896 | extern unsigned int sysctl_sched_shares_ratelimit; | ||
1897 | extern unsigned int sysctl_sched_shares_thresh; | ||
1898 | extern unsigned int sysctl_sched_child_runs_first; | 1907 | extern unsigned int sysctl_sched_child_runs_first; |
1899 | 1908 | ||
1900 | enum sched_tunable_scaling { | 1909 | enum sched_tunable_scaling { |
@@ -1910,6 +1919,7 @@ extern unsigned int sysctl_sched_migration_cost; | |||
1910 | extern unsigned int sysctl_sched_nr_migrate; | 1919 | extern unsigned int sysctl_sched_nr_migrate; |
1911 | extern unsigned int sysctl_sched_time_avg; | 1920 | extern unsigned int sysctl_sched_time_avg; |
1912 | extern unsigned int sysctl_timer_migration; | 1921 | extern unsigned int sysctl_timer_migration; |
1922 | extern unsigned int sysctl_sched_shares_window; | ||
1913 | 1923 | ||
1914 | int sched_proc_update_handler(struct ctl_table *table, int write, | 1924 | int sched_proc_update_handler(struct ctl_table *table, int write, |
1915 | void __user *buffer, size_t *length, | 1925 | void __user *buffer, size_t *length, |
@@ -1935,6 +1945,24 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
1935 | 1945 | ||
1936 | extern unsigned int sysctl_sched_compat_yield; | 1946 | extern unsigned int sysctl_sched_compat_yield; |
1937 | 1947 | ||
1948 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
1949 | extern unsigned int sysctl_sched_autogroup_enabled; | ||
1950 | |||
1951 | extern void sched_autogroup_create_attach(struct task_struct *p); | ||
1952 | extern void sched_autogroup_detach(struct task_struct *p); | ||
1953 | extern void sched_autogroup_fork(struct signal_struct *sig); | ||
1954 | extern void sched_autogroup_exit(struct signal_struct *sig); | ||
1955 | #ifdef CONFIG_PROC_FS | ||
1956 | extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); | ||
1957 | extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); | ||
1958 | #endif | ||
1959 | #else | ||
1960 | static inline void sched_autogroup_create_attach(struct task_struct *p) { } | ||
1961 | static inline void sched_autogroup_detach(struct task_struct *p) { } | ||
1962 | static inline void sched_autogroup_fork(struct signal_struct *sig) { } | ||
1963 | static inline void sched_autogroup_exit(struct signal_struct *sig) { } | ||
1964 | #endif | ||
1965 | |||
1938 | #ifdef CONFIG_RT_MUTEXES | 1966 | #ifdef CONFIG_RT_MUTEXES |
1939 | extern int rt_mutex_getprio(struct task_struct *p); | 1967 | extern int rt_mutex_getprio(struct task_struct *p); |
1940 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 1968 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
@@ -1953,9 +1981,10 @@ extern int task_nice(const struct task_struct *p); | |||
1953 | extern int can_nice(const struct task_struct *p, const int nice); | 1981 | extern int can_nice(const struct task_struct *p, const int nice); |
1954 | extern int task_curr(const struct task_struct *p); | 1982 | extern int task_curr(const struct task_struct *p); |
1955 | extern int idle_cpu(int cpu); | 1983 | extern int idle_cpu(int cpu); |
1956 | extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); | 1984 | extern int sched_setscheduler(struct task_struct *, int, |
1985 | const struct sched_param *); | ||
1957 | extern int sched_setscheduler_nocheck(struct task_struct *, int, | 1986 | extern int sched_setscheduler_nocheck(struct task_struct *, int, |
1958 | struct sched_param *); | 1987 | const struct sched_param *); |
1959 | extern struct task_struct *idle_task(int cpu); | 1988 | extern struct task_struct *idle_task(int cpu); |
1960 | extern struct task_struct *curr_task(int cpu); | 1989 | extern struct task_struct *curr_task(int cpu); |
1961 | extern void set_curr_task(int cpu, struct task_struct *p); | 1990 | extern void set_curr_task(int cpu, struct task_struct *p); |
diff --git a/include/linux/sfi.h b/include/linux/sfi.h index 7f770c638e99..fe817918b30e 100644 --- a/include/linux/sfi.h +++ b/include/linux/sfi.h | |||
@@ -77,6 +77,8 @@ | |||
77 | #define SFI_OEM_ID_SIZE 6 | 77 | #define SFI_OEM_ID_SIZE 6 |
78 | #define SFI_OEM_TABLE_ID_SIZE 8 | 78 | #define SFI_OEM_TABLE_ID_SIZE 8 |
79 | 79 | ||
80 | #define SFI_NAME_LEN 16 | ||
81 | |||
80 | #define SFI_SYST_SEARCH_BEGIN 0x000E0000 | 82 | #define SFI_SYST_SEARCH_BEGIN 0x000E0000 |
81 | #define SFI_SYST_SEARCH_END 0x000FFFFF | 83 | #define SFI_SYST_SEARCH_END 0x000FFFFF |
82 | 84 | ||
@@ -156,13 +158,13 @@ struct sfi_device_table_entry { | |||
156 | u16 addr; | 158 | u16 addr; |
157 | u8 irq; | 159 | u8 irq; |
158 | u32 max_freq; | 160 | u32 max_freq; |
159 | char name[16]; | 161 | char name[SFI_NAME_LEN]; |
160 | } __packed; | 162 | } __packed; |
161 | 163 | ||
162 | struct sfi_gpio_table_entry { | 164 | struct sfi_gpio_table_entry { |
163 | char controller_name[16]; | 165 | char controller_name[SFI_NAME_LEN]; |
164 | u16 pin_no; | 166 | u16 pin_no; |
165 | char pin_name[16]; | 167 | char pin_name[SFI_NAME_LEN]; |
166 | } __packed; | 168 | } __packed; |
167 | 169 | ||
168 | typedef int (*sfi_table_handler) (struct sfi_table_header *table); | 170 | typedef int (*sfi_table_handler) (struct sfi_table_header *table); |
diff --git a/include/linux/timer.h b/include/linux/timer.h index 38cf093ef62c..6abd9138beda 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h | |||
@@ -24,9 +24,9 @@ struct timer_list { | |||
24 | int slack; | 24 | int slack; |
25 | 25 | ||
26 | #ifdef CONFIG_TIMER_STATS | 26 | #ifdef CONFIG_TIMER_STATS |
27 | int start_pid; | ||
27 | void *start_site; | 28 | void *start_site; |
28 | char start_comm[16]; | 29 | char start_comm[16]; |
29 | int start_pid; | ||
30 | #endif | 30 | #endif |
31 | #ifdef CONFIG_LOCKDEP | 31 | #ifdef CONFIG_LOCKDEP |
32 | struct lockdep_map lockdep_map; | 32 | struct lockdep_map lockdep_map; |
@@ -48,12 +48,38 @@ extern struct tvec_base boot_tvec_bases; | |||
48 | #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) | 48 | #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) |
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | /* | ||
52 | * Note that all tvec_bases are 2 byte aligned and lower bit of | ||
53 | * base in timer_list is guaranteed to be zero. Use the LSB to | ||
54 | * indicate whether the timer is deferrable. | ||
55 | * | ||
56 | * A deferrable timer will work normally when the system is busy, but | ||
57 | * will not cause a CPU to come out of idle just to service it; instead, | ||
58 | * the timer will be serviced when the CPU eventually wakes up with a | ||
59 | * subsequent non-deferrable timer. | ||
60 | */ | ||
61 | #define TBASE_DEFERRABLE_FLAG (0x1) | ||
62 | |||
51 | #define TIMER_INITIALIZER(_function, _expires, _data) { \ | 63 | #define TIMER_INITIALIZER(_function, _expires, _data) { \ |
52 | .entry = { .prev = TIMER_ENTRY_STATIC }, \ | 64 | .entry = { .prev = TIMER_ENTRY_STATIC }, \ |
53 | .function = (_function), \ | 65 | .function = (_function), \ |
54 | .expires = (_expires), \ | 66 | .expires = (_expires), \ |
55 | .data = (_data), \ | 67 | .data = (_data), \ |
56 | .base = &boot_tvec_bases, \ | 68 | .base = &boot_tvec_bases, \ |
69 | .slack = -1, \ | ||
70 | __TIMER_LOCKDEP_MAP_INITIALIZER( \ | ||
71 | __FILE__ ":" __stringify(__LINE__)) \ | ||
72 | } | ||
73 | |||
74 | #define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ | ||
75 | ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) | ||
76 | |||
77 | #define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ | ||
78 | .entry = { .prev = TIMER_ENTRY_STATIC }, \ | ||
79 | .function = (_function), \ | ||
80 | .expires = (_expires), \ | ||
81 | .data = (_data), \ | ||
82 | .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ | ||
57 | __TIMER_LOCKDEP_MAP_INITIALIZER( \ | 83 | __TIMER_LOCKDEP_MAP_INITIALIZER( \ |
58 | __FILE__ ":" __stringify(__LINE__)) \ | 84 | __FILE__ ":" __stringify(__LINE__)) \ |
59 | } | 85 | } |
@@ -248,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) | |||
248 | 274 | ||
249 | extern void add_timer(struct timer_list *timer); | 275 | extern void add_timer(struct timer_list *timer); |
250 | 276 | ||
277 | extern int try_to_del_timer_sync(struct timer_list *timer); | ||
278 | |||
251 | #ifdef CONFIG_SMP | 279 | #ifdef CONFIG_SMP |
252 | extern int try_to_del_timer_sync(struct timer_list *timer); | ||
253 | extern int del_timer_sync(struct timer_list *timer); | 280 | extern int del_timer_sync(struct timer_list *timer); |
254 | #else | 281 | #else |
255 | # define try_to_del_timer_sync(t) del_timer(t) | ||
256 | # define del_timer_sync(t) del_timer(t) | 282 | # define del_timer_sync(t) del_timer(t) |
257 | #endif | 283 | #endif |
258 | 284 | ||
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h new file mode 100644 index 000000000000..d24aabaca474 --- /dev/null +++ b/include/linux/timerqueue.h | |||
@@ -0,0 +1,50 @@ | |||
1 | #ifndef _LINUX_TIMERQUEUE_H | ||
2 | #define _LINUX_TIMERQUEUE_H | ||
3 | |||
4 | #include <linux/rbtree.h> | ||
5 | #include <linux/ktime.h> | ||
6 | |||
7 | |||
8 | struct timerqueue_node { | ||
9 | struct rb_node node; | ||
10 | ktime_t expires; | ||
11 | }; | ||
12 | |||
13 | struct timerqueue_head { | ||
14 | struct rb_root head; | ||
15 | struct timerqueue_node *next; | ||
16 | }; | ||
17 | |||
18 | |||
19 | extern void timerqueue_add(struct timerqueue_head *head, | ||
20 | struct timerqueue_node *node); | ||
21 | extern void timerqueue_del(struct timerqueue_head *head, | ||
22 | struct timerqueue_node *node); | ||
23 | extern struct timerqueue_node *timerqueue_iterate_next( | ||
24 | struct timerqueue_node *node); | ||
25 | |||
26 | /** | ||
27 | * timerqueue_getnext - Returns the timer with the earlies expiration time | ||
28 | * | ||
29 | * @head: head of timerqueue | ||
30 | * | ||
31 | * Returns a pointer to the timer node that has the | ||
32 | * earliest expiration time. | ||
33 | */ | ||
34 | static inline | ||
35 | struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) | ||
36 | { | ||
37 | return head->next; | ||
38 | } | ||
39 | |||
40 | static inline void timerqueue_init(struct timerqueue_node *node) | ||
41 | { | ||
42 | RB_CLEAR_NODE(&node->node); | ||
43 | } | ||
44 | |||
45 | static inline void timerqueue_init_head(struct timerqueue_head *head) | ||
46 | { | ||
47 | head->head = RB_ROOT; | ||
48 | head->next = NULL; | ||
49 | } | ||
50 | #endif /* _LINUX_TIMERQUEUE_H */ | ||
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d3e4f87e95c0..c6814616653b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -32,7 +32,7 @@ struct tracepoint { | |||
32 | int state; /* State. */ | 32 | int state; /* State. */ |
33 | void (*regfunc)(void); | 33 | void (*regfunc)(void); |
34 | void (*unregfunc)(void); | 34 | void (*unregfunc)(void); |
35 | struct tracepoint_func *funcs; | 35 | struct tracepoint_func __rcu *funcs; |
36 | } __attribute__((aligned(32))); /* | 36 | } __attribute__((aligned(32))); /* |
37 | * Aligned on 32 bytes because it is | 37 | * Aligned on 32 bytes because it is |
38 | * globally visible and gcc happily | 38 | * globally visible and gcc happily |
@@ -326,7 +326,7 @@ do_trace: \ | |||
326 | * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); | 326 | * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); |
327 | * __entry->next_pid = next->pid; | 327 | * __entry->next_pid = next->pid; |
328 | * __entry->next_prio = next->prio; | 328 | * __entry->next_prio = next->prio; |
329 | * ) | 329 | * ), |
330 | * | 330 | * |
331 | * * | 331 | * * |
332 | * * Formatted output of a trace record via TP_printk(). | 332 | * * Formatted output of a trace record via TP_printk(). |
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0c0771f06bfa..bd257fee6031 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h | |||
@@ -127,12 +127,20 @@ struct execute_work { | |||
127 | .timer = TIMER_INITIALIZER(NULL, 0, 0), \ | 127 | .timer = TIMER_INITIALIZER(NULL, 0, 0), \ |
128 | } | 128 | } |
129 | 129 | ||
130 | #define __DEFERRED_WORK_INITIALIZER(n, f) { \ | ||
131 | .work = __WORK_INITIALIZER((n).work, (f)), \ | ||
132 | .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ | ||
133 | } | ||
134 | |||
130 | #define DECLARE_WORK(n, f) \ | 135 | #define DECLARE_WORK(n, f) \ |
131 | struct work_struct n = __WORK_INITIALIZER(n, f) | 136 | struct work_struct n = __WORK_INITIALIZER(n, f) |
132 | 137 | ||
133 | #define DECLARE_DELAYED_WORK(n, f) \ | 138 | #define DECLARE_DELAYED_WORK(n, f) \ |
134 | struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) | 139 | struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) |
135 | 140 | ||
141 | #define DECLARE_DEFERRED_WORK(n, f) \ | ||
142 | struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) | ||
143 | |||
136 | /* | 144 | /* |
137 | * initialize a work item's function pointer | 145 | * initialize a work item's function pointer |
138 | */ | 146 | */ |
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index b0b4eb24d592..da39b22636f7 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h | |||
@@ -21,6 +21,16 @@ | |||
21 | #undef CREATE_TRACE_POINTS | 21 | #undef CREATE_TRACE_POINTS |
22 | 22 | ||
23 | #include <linux/stringify.h> | 23 | #include <linux/stringify.h> |
24 | /* | ||
25 | * module.h includes tracepoints, and because ftrace.h | ||
26 | * pulls in module.h: | ||
27 | * trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h -> | ||
28 | * linux/ftrace.h -> linux/module.h | ||
29 | * we must include module.h here before we play with any of | ||
30 | * the TRACE_EVENT() macros, otherwise the tracepoints included | ||
31 | * by module.h may break the build. | ||
32 | */ | ||
33 | #include <linux/module.h> | ||
24 | 34 | ||
25 | #undef TRACE_EVENT | 35 | #undef TRACE_EVENT |
26 | #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ | 36 | #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ |
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 75ce9d500d8e..f10293c41b1e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h | |||
@@ -25,9 +25,7 @@ TRACE_EVENT(kfree_skb, | |||
25 | 25 | ||
26 | TP_fast_assign( | 26 | TP_fast_assign( |
27 | __entry->skbaddr = skb; | 27 | __entry->skbaddr = skb; |
28 | if (skb) { | 28 | __entry->protocol = ntohs(skb->protocol); |
29 | __entry->protocol = ntohs(skb->protocol); | ||
30 | } | ||
31 | __entry->location = location; | 29 | __entry->location = location; |
32 | ), | 30 | ), |
33 | 31 | ||
diff --git a/init/Kconfig b/init/Kconfig index c9728992a776..8dfd094e6875 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -393,7 +393,6 @@ config PREEMPT_RCU | |||
393 | 393 | ||
394 | config RCU_TRACE | 394 | config RCU_TRACE |
395 | bool "Enable tracing for RCU" | 395 | bool "Enable tracing for RCU" |
396 | depends on TREE_RCU || TREE_PREEMPT_RCU | ||
397 | help | 396 | help |
398 | This option provides tracing in RCU which presents stats | 397 | This option provides tracing in RCU which presents stats |
399 | in debugfs for debugging RCU implementation. | 398 | in debugfs for debugging RCU implementation. |
@@ -459,6 +458,60 @@ config TREE_RCU_TRACE | |||
459 | TREE_PREEMPT_RCU implementations, permitting Makefile to | 458 | TREE_PREEMPT_RCU implementations, permitting Makefile to |
460 | trivially select kernel/rcutree_trace.c. | 459 | trivially select kernel/rcutree_trace.c. |
461 | 460 | ||
461 | config RCU_BOOST | ||
462 | bool "Enable RCU priority boosting" | ||
463 | depends on RT_MUTEXES && TINY_PREEMPT_RCU | ||
464 | default n | ||
465 | help | ||
466 | This option boosts the priority of preempted RCU readers that | ||
467 | block the current preemptible RCU grace period for too long. | ||
468 | This option also prevents heavy loads from blocking RCU | ||
469 | callback invocation for all flavors of RCU. | ||
470 | |||
471 | Say Y here if you are working with real-time apps or heavy loads | ||
472 | Say N here if you are unsure. | ||
473 | |||
474 | config RCU_BOOST_PRIO | ||
475 | int "Real-time priority to boost RCU readers to" | ||
476 | range 1 99 | ||
477 | depends on RCU_BOOST | ||
478 | default 1 | ||
479 | help | ||
480 | This option specifies the real-time priority to which preempted | ||
481 | RCU readers are to be boosted. If you are working with CPU-bound | ||
482 | real-time applications, you should specify a priority higher then | ||
483 | the highest-priority CPU-bound application. | ||
484 | |||
485 | Specify the real-time priority, or take the default if unsure. | ||
486 | |||
487 | config RCU_BOOST_DELAY | ||
488 | int "Milliseconds to delay boosting after RCU grace-period start" | ||
489 | range 0 3000 | ||
490 | depends on RCU_BOOST | ||
491 | default 500 | ||
492 | help | ||
493 | This option specifies the time to wait after the beginning of | ||
494 | a given grace period before priority-boosting preempted RCU | ||
495 | readers blocking that grace period. Note that any RCU reader | ||
496 | blocking an expedited RCU grace period is boosted immediately. | ||
497 | |||
498 | Accept the default if unsure. | ||
499 | |||
500 | config SRCU_SYNCHRONIZE_DELAY | ||
501 | int "Microseconds to delay before waiting for readers" | ||
502 | range 0 20 | ||
503 | default 10 | ||
504 | help | ||
505 | This option controls how long SRCU delays before entering its | ||
506 | loop waiting on SRCU readers. The purpose of this loop is | ||
507 | to avoid the unconditional context-switch penalty that would | ||
508 | otherwise be incurred if there was an active SRCU reader, | ||
509 | in a manner similar to adaptive locking schemes. This should | ||
510 | be set to be a bit longer than the common-case SRCU read-side | ||
511 | critical-section overhead. | ||
512 | |||
513 | Accept the default if unsure. | ||
514 | |||
462 | endmenu # "RCU Subsystem" | 515 | endmenu # "RCU Subsystem" |
463 | 516 | ||
464 | config IKCONFIG | 517 | config IKCONFIG |
@@ -741,6 +794,19 @@ config NET_NS | |||
741 | 794 | ||
742 | endif # NAMESPACES | 795 | endif # NAMESPACES |
743 | 796 | ||
797 | config SCHED_AUTOGROUP | ||
798 | bool "Automatic process group scheduling" | ||
799 | select EVENTFD | ||
800 | select CGROUPS | ||
801 | select CGROUP_SCHED | ||
802 | select FAIR_GROUP_SCHED | ||
803 | help | ||
804 | This option optimizes the scheduler for common desktop workloads by | ||
805 | automatically creating and populating task groups. This separation | ||
806 | of workloads isolates aggressive CPU burners (like build jobs) from | ||
807 | desktop applications. Task group autogeneration is currently based | ||
808 | upon task session. | ||
809 | |||
744 | config MM_OWNER | 810 | config MM_OWNER |
745 | bool | 811 | bool |
746 | 812 | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 0b5ff083fa22..e0f2831634b4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ | |||
100 | obj-$(CONFIG_TRACING) += trace/ | 100 | obj-$(CONFIG_TRACING) += trace/ |
101 | obj-$(CONFIG_X86_DS) += trace/ | 101 | obj-$(CONFIG_X86_DS) += trace/ |
102 | obj-$(CONFIG_RING_BUFFER) += trace/ | 102 | obj-$(CONFIG_RING_BUFFER) += trace/ |
103 | obj-$(CONFIG_TRACEPOINTS) += trace/ | ||
103 | obj-$(CONFIG_SMP) += sched_cpupri.o | 104 | obj-$(CONFIG_SMP) += sched_cpupri.o |
104 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 105 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
105 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 106 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index f6e726f18491..156cc5556140 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu) | |||
189 | } | 189 | } |
190 | 190 | ||
191 | struct take_cpu_down_param { | 191 | struct take_cpu_down_param { |
192 | struct task_struct *caller; | ||
193 | unsigned long mod; | 192 | unsigned long mod; |
194 | void *hcpu; | 193 | void *hcpu; |
195 | }; | 194 | }; |
@@ -198,7 +197,6 @@ struct take_cpu_down_param { | |||
198 | static int __ref take_cpu_down(void *_param) | 197 | static int __ref take_cpu_down(void *_param) |
199 | { | 198 | { |
200 | struct take_cpu_down_param *param = _param; | 199 | struct take_cpu_down_param *param = _param; |
201 | unsigned int cpu = (unsigned long)param->hcpu; | ||
202 | int err; | 200 | int err; |
203 | 201 | ||
204 | /* Ensure this CPU doesn't handle any more interrupts. */ | 202 | /* Ensure this CPU doesn't handle any more interrupts. */ |
@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param) | |||
208 | 206 | ||
209 | cpu_notify(CPU_DYING | param->mod, param->hcpu); | 207 | cpu_notify(CPU_DYING | param->mod, param->hcpu); |
210 | 208 | ||
211 | if (task_cpu(param->caller) == cpu) | ||
212 | move_task_off_dead_cpu(cpu, param->caller); | ||
213 | /* Force idle task to run as soon as we yield: it should | ||
214 | immediately notice cpu is offline and die quickly. */ | ||
215 | sched_idle_next(); | ||
216 | return 0; | 209 | return 0; |
217 | } | 210 | } |
218 | 211 | ||
@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
223 | void *hcpu = (void *)(long)cpu; | 216 | void *hcpu = (void *)(long)cpu; |
224 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | 217 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; |
225 | struct take_cpu_down_param tcd_param = { | 218 | struct take_cpu_down_param tcd_param = { |
226 | .caller = current, | ||
227 | .mod = mod, | 219 | .mod = mod, |
228 | .hcpu = hcpu, | 220 | .hcpu = hcpu, |
229 | }; | 221 | }; |
@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
253 | } | 245 | } |
254 | BUG_ON(cpu_online(cpu)); | 246 | BUG_ON(cpu_online(cpu)); |
255 | 247 | ||
256 | /* Wait for it to sleep (leaving idle task). */ | 248 | /* |
249 | * The migration_call() CPU_DYING callback will have removed all | ||
250 | * runnable tasks from the cpu, there's only the idle task left now | ||
251 | * that the migration thread is done doing the stop_machine thing. | ||
252 | * | ||
253 | * Wait for the stop thread to go away. | ||
254 | */ | ||
257 | while (!idle_cpu(cpu)) | 255 | while (!idle_cpu(cpu)) |
258 | yield(); | 256 | cpu_relax(); |
259 | 257 | ||
260 | /* This actually kills the CPU. */ | 258 | /* This actually kills the CPU. */ |
261 | __cpu_die(cpu); | 259 | __cpu_die(cpu); |
@@ -386,6 +384,14 @@ out: | |||
386 | #ifdef CONFIG_PM_SLEEP_SMP | 384 | #ifdef CONFIG_PM_SLEEP_SMP |
387 | static cpumask_var_t frozen_cpus; | 385 | static cpumask_var_t frozen_cpus; |
388 | 386 | ||
387 | void __weak arch_disable_nonboot_cpus_begin(void) | ||
388 | { | ||
389 | } | ||
390 | |||
391 | void __weak arch_disable_nonboot_cpus_end(void) | ||
392 | { | ||
393 | } | ||
394 | |||
389 | int disable_nonboot_cpus(void) | 395 | int disable_nonboot_cpus(void) |
390 | { | 396 | { |
391 | int cpu, first_cpu, error = 0; | 397 | int cpu, first_cpu, error = 0; |
@@ -397,6 +403,7 @@ int disable_nonboot_cpus(void) | |||
397 | * with the userspace trying to use the CPU hotplug at the same time | 403 | * with the userspace trying to use the CPU hotplug at the same time |
398 | */ | 404 | */ |
399 | cpumask_clear(frozen_cpus); | 405 | cpumask_clear(frozen_cpus); |
406 | arch_disable_nonboot_cpus_begin(); | ||
400 | 407 | ||
401 | printk("Disabling non-boot CPUs ...\n"); | 408 | printk("Disabling non-boot CPUs ...\n"); |
402 | for_each_online_cpu(cpu) { | 409 | for_each_online_cpu(cpu) { |
@@ -412,6 +419,8 @@ int disable_nonboot_cpus(void) | |||
412 | } | 419 | } |
413 | } | 420 | } |
414 | 421 | ||
422 | arch_disable_nonboot_cpus_end(); | ||
423 | |||
415 | if (!error) { | 424 | if (!error) { |
416 | BUG_ON(num_online_cpus() > 1); | 425 | BUG_ON(num_online_cpus() > 1); |
417 | /* Make sure the CPUs won't be enabled by someone else */ | 426 | /* Make sure the CPUs won't be enabled by someone else */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 5447dc7defa9..7d164e25b0f0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) | |||
174 | 174 | ||
175 | static inline void put_signal_struct(struct signal_struct *sig) | 175 | static inline void put_signal_struct(struct signal_struct *sig) |
176 | { | 176 | { |
177 | if (atomic_dec_and_test(&sig->sigcnt)) | 177 | if (atomic_dec_and_test(&sig->sigcnt)) { |
178 | sched_autogroup_exit(sig); | ||
178 | free_signal_struct(sig); | 179 | free_signal_struct(sig); |
180 | } | ||
179 | } | 181 | } |
180 | 182 | ||
181 | void __put_task_struct(struct task_struct *tsk) | 183 | void __put_task_struct(struct task_struct *tsk) |
@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
905 | posix_cpu_timers_init_group(sig); | 907 | posix_cpu_timers_init_group(sig); |
906 | 908 | ||
907 | tty_audit_fork(sig); | 909 | tty_audit_fork(sig); |
910 | sched_autogroup_fork(sig); | ||
908 | 911 | ||
909 | sig->oom_adj = current->signal->oom_adj; | 912 | sig->oom_adj = current->signal->oom_adj; |
910 | sig->oom_score_adj = current->signal->oom_score_adj; | 913 | sig->oom_score_adj = current->signal->oom_score_adj; |
@@ -1315,7 +1318,7 @@ bad_fork_cleanup_mm: | |||
1315 | } | 1318 | } |
1316 | bad_fork_cleanup_signal: | 1319 | bad_fork_cleanup_signal: |
1317 | if (!(clone_flags & CLONE_THREAD)) | 1320 | if (!(clone_flags & CLONE_THREAD)) |
1318 | free_signal_struct(p->signal); | 1321 | put_signal_struct(p->signal); |
1319 | bad_fork_cleanup_sighand: | 1322 | bad_fork_cleanup_sighand: |
1320 | __cleanup_sighand(p->sighand); | 1323 | __cleanup_sighand(p->sighand); |
1321 | bad_fork_cleanup_fs: | 1324 | bad_fork_cleanup_fs: |
diff --git a/kernel/futex.c b/kernel/futex.c index 40a8777a27d0..3019b92e6917 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -69,6 +69,14 @@ int __read_mostly futex_cmpxchg_enabled; | |||
69 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 69 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Futex flags used to encode options to functions and preserve them across | ||
73 | * restarts. | ||
74 | */ | ||
75 | #define FLAGS_SHARED 0x01 | ||
76 | #define FLAGS_CLOCKRT 0x02 | ||
77 | #define FLAGS_HAS_TIMEOUT 0x04 | ||
78 | |||
79 | /* | ||
72 | * Priority Inheritance state: | 80 | * Priority Inheritance state: |
73 | */ | 81 | */ |
74 | struct futex_pi_state { | 82 | struct futex_pi_state { |
@@ -123,6 +131,12 @@ struct futex_q { | |||
123 | u32 bitset; | 131 | u32 bitset; |
124 | }; | 132 | }; |
125 | 133 | ||
134 | static const struct futex_q futex_q_init = { | ||
135 | /* list gets initialized in queue_me()*/ | ||
136 | .key = FUTEX_KEY_INIT, | ||
137 | .bitset = FUTEX_BITSET_MATCH_ANY | ||
138 | }; | ||
139 | |||
126 | /* | 140 | /* |
127 | * Hash buckets are shared by all the futex_keys that hash to the same | 141 | * Hash buckets are shared by all the futex_keys that hash to the same |
128 | * location. Each key may have multiple futex_q structures, one for each task | 142 | * location. Each key may have multiple futex_q structures, one for each task |
@@ -283,8 +297,7 @@ again: | |||
283 | return 0; | 297 | return 0; |
284 | } | 298 | } |
285 | 299 | ||
286 | static inline | 300 | static inline void put_futex_key(union futex_key *key) |
287 | void put_futex_key(int fshared, union futex_key *key) | ||
288 | { | 301 | { |
289 | drop_futex_key_refs(key); | 302 | drop_futex_key_refs(key); |
290 | } | 303 | } |
@@ -870,7 +883,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) | |||
870 | /* | 883 | /* |
871 | * Wake up waiters matching bitset queued on this futex (uaddr). | 884 | * Wake up waiters matching bitset queued on this futex (uaddr). |
872 | */ | 885 | */ |
873 | static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | 886 | static int |
887 | futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) | ||
874 | { | 888 | { |
875 | struct futex_hash_bucket *hb; | 889 | struct futex_hash_bucket *hb; |
876 | struct futex_q *this, *next; | 890 | struct futex_q *this, *next; |
@@ -881,7 +895,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
881 | if (!bitset) | 895 | if (!bitset) |
882 | return -EINVAL; | 896 | return -EINVAL; |
883 | 897 | ||
884 | ret = get_futex_key(uaddr, fshared, &key); | 898 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); |
885 | if (unlikely(ret != 0)) | 899 | if (unlikely(ret != 0)) |
886 | goto out; | 900 | goto out; |
887 | 901 | ||
@@ -907,7 +921,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
907 | } | 921 | } |
908 | 922 | ||
909 | spin_unlock(&hb->lock); | 923 | spin_unlock(&hb->lock); |
910 | put_futex_key(fshared, &key); | 924 | put_futex_key(&key); |
911 | out: | 925 | out: |
912 | return ret; | 926 | return ret; |
913 | } | 927 | } |
@@ -917,7 +931,7 @@ out: | |||
917 | * to this virtual address: | 931 | * to this virtual address: |
918 | */ | 932 | */ |
919 | static int | 933 | static int |
920 | futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 934 | futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, |
921 | int nr_wake, int nr_wake2, int op) | 935 | int nr_wake, int nr_wake2, int op) |
922 | { | 936 | { |
923 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 937 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
@@ -927,10 +941,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | |||
927 | int ret, op_ret; | 941 | int ret, op_ret; |
928 | 942 | ||
929 | retry: | 943 | retry: |
930 | ret = get_futex_key(uaddr1, fshared, &key1); | 944 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); |
931 | if (unlikely(ret != 0)) | 945 | if (unlikely(ret != 0)) |
932 | goto out; | 946 | goto out; |
933 | ret = get_futex_key(uaddr2, fshared, &key2); | 947 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
934 | if (unlikely(ret != 0)) | 948 | if (unlikely(ret != 0)) |
935 | goto out_put_key1; | 949 | goto out_put_key1; |
936 | 950 | ||
@@ -962,11 +976,11 @@ retry_private: | |||
962 | if (ret) | 976 | if (ret) |
963 | goto out_put_keys; | 977 | goto out_put_keys; |
964 | 978 | ||
965 | if (!fshared) | 979 | if (!(flags & FLAGS_SHARED)) |
966 | goto retry_private; | 980 | goto retry_private; |
967 | 981 | ||
968 | put_futex_key(fshared, &key2); | 982 | put_futex_key(&key2); |
969 | put_futex_key(fshared, &key1); | 983 | put_futex_key(&key1); |
970 | goto retry; | 984 | goto retry; |
971 | } | 985 | } |
972 | 986 | ||
@@ -996,9 +1010,9 @@ retry_private: | |||
996 | 1010 | ||
997 | double_unlock_hb(hb1, hb2); | 1011 | double_unlock_hb(hb1, hb2); |
998 | out_put_keys: | 1012 | out_put_keys: |
999 | put_futex_key(fshared, &key2); | 1013 | put_futex_key(&key2); |
1000 | out_put_key1: | 1014 | out_put_key1: |
1001 | put_futex_key(fshared, &key1); | 1015 | put_futex_key(&key1); |
1002 | out: | 1016 | out: |
1003 | return ret; | 1017 | return ret; |
1004 | } | 1018 | } |
@@ -1133,13 +1147,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1133 | /** | 1147 | /** |
1134 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | 1148 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 |
1135 | * @uaddr1: source futex user address | 1149 | * @uaddr1: source futex user address |
1136 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 1150 | * @flags: futex flags (FLAGS_SHARED, etc.) |
1137 | * @uaddr2: target futex user address | 1151 | * @uaddr2: target futex user address |
1138 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) | 1152 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) |
1139 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) | 1153 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) |
1140 | * @cmpval: @uaddr1 expected value (or %NULL) | 1154 | * @cmpval: @uaddr1 expected value (or %NULL) |
1141 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a | 1155 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a |
1142 | * pi futex (pi to pi requeue is not supported) | 1156 | * pi futex (pi to pi requeue is not supported) |
1143 | * | 1157 | * |
1144 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1158 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
1145 | * uaddr2 atomically on behalf of the top waiter. | 1159 | * uaddr2 atomically on behalf of the top waiter. |
@@ -1148,9 +1162,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1148 | * >=0 - on success, the number of tasks requeued or woken | 1162 | * >=0 - on success, the number of tasks requeued or woken |
1149 | * <0 - on error | 1163 | * <0 - on error |
1150 | */ | 1164 | */ |
1151 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 1165 | static int futex_requeue(u32 __user *uaddr1, unsigned int flags, |
1152 | int nr_wake, int nr_requeue, u32 *cmpval, | 1166 | u32 __user *uaddr2, int nr_wake, int nr_requeue, |
1153 | int requeue_pi) | 1167 | u32 *cmpval, int requeue_pi) |
1154 | { | 1168 | { |
1155 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 1169 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
1156 | int drop_count = 0, task_count = 0, ret; | 1170 | int drop_count = 0, task_count = 0, ret; |
@@ -1191,10 +1205,10 @@ retry: | |||
1191 | pi_state = NULL; | 1205 | pi_state = NULL; |
1192 | } | 1206 | } |
1193 | 1207 | ||
1194 | ret = get_futex_key(uaddr1, fshared, &key1); | 1208 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); |
1195 | if (unlikely(ret != 0)) | 1209 | if (unlikely(ret != 0)) |
1196 | goto out; | 1210 | goto out; |
1197 | ret = get_futex_key(uaddr2, fshared, &key2); | 1211 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
1198 | if (unlikely(ret != 0)) | 1212 | if (unlikely(ret != 0)) |
1199 | goto out_put_key1; | 1213 | goto out_put_key1; |
1200 | 1214 | ||
@@ -1216,11 +1230,11 @@ retry_private: | |||
1216 | if (ret) | 1230 | if (ret) |
1217 | goto out_put_keys; | 1231 | goto out_put_keys; |
1218 | 1232 | ||
1219 | if (!fshared) | 1233 | if (!(flags & FLAGS_SHARED)) |
1220 | goto retry_private; | 1234 | goto retry_private; |
1221 | 1235 | ||
1222 | put_futex_key(fshared, &key2); | 1236 | put_futex_key(&key2); |
1223 | put_futex_key(fshared, &key1); | 1237 | put_futex_key(&key1); |
1224 | goto retry; | 1238 | goto retry; |
1225 | } | 1239 | } |
1226 | if (curval != *cmpval) { | 1240 | if (curval != *cmpval) { |
@@ -1260,8 +1274,8 @@ retry_private: | |||
1260 | break; | 1274 | break; |
1261 | case -EFAULT: | 1275 | case -EFAULT: |
1262 | double_unlock_hb(hb1, hb2); | 1276 | double_unlock_hb(hb1, hb2); |
1263 | put_futex_key(fshared, &key2); | 1277 | put_futex_key(&key2); |
1264 | put_futex_key(fshared, &key1); | 1278 | put_futex_key(&key1); |
1265 | ret = fault_in_user_writeable(uaddr2); | 1279 | ret = fault_in_user_writeable(uaddr2); |
1266 | if (!ret) | 1280 | if (!ret) |
1267 | goto retry; | 1281 | goto retry; |
@@ -1269,8 +1283,8 @@ retry_private: | |||
1269 | case -EAGAIN: | 1283 | case -EAGAIN: |
1270 | /* The owner was exiting, try again. */ | 1284 | /* The owner was exiting, try again. */ |
1271 | double_unlock_hb(hb1, hb2); | 1285 | double_unlock_hb(hb1, hb2); |
1272 | put_futex_key(fshared, &key2); | 1286 | put_futex_key(&key2); |
1273 | put_futex_key(fshared, &key1); | 1287 | put_futex_key(&key1); |
1274 | cond_resched(); | 1288 | cond_resched(); |
1275 | goto retry; | 1289 | goto retry; |
1276 | default: | 1290 | default: |
@@ -1352,9 +1366,9 @@ out_unlock: | |||
1352 | drop_futex_key_refs(&key1); | 1366 | drop_futex_key_refs(&key1); |
1353 | 1367 | ||
1354 | out_put_keys: | 1368 | out_put_keys: |
1355 | put_futex_key(fshared, &key2); | 1369 | put_futex_key(&key2); |
1356 | out_put_key1: | 1370 | out_put_key1: |
1357 | put_futex_key(fshared, &key1); | 1371 | put_futex_key(&key1); |
1358 | out: | 1372 | out: |
1359 | if (pi_state != NULL) | 1373 | if (pi_state != NULL) |
1360 | free_pi_state(pi_state); | 1374 | free_pi_state(pi_state); |
@@ -1494,7 +1508,7 @@ static void unqueue_me_pi(struct futex_q *q) | |||
1494 | * private futexes. | 1508 | * private futexes. |
1495 | */ | 1509 | */ |
1496 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | 1510 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1497 | struct task_struct *newowner, int fshared) | 1511 | struct task_struct *newowner) |
1498 | { | 1512 | { |
1499 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | 1513 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; |
1500 | struct futex_pi_state *pi_state = q->pi_state; | 1514 | struct futex_pi_state *pi_state = q->pi_state; |
@@ -1587,20 +1601,11 @@ handle_fault: | |||
1587 | goto retry; | 1601 | goto retry; |
1588 | } | 1602 | } |
1589 | 1603 | ||
1590 | /* | ||
1591 | * In case we must use restart_block to restart a futex_wait, | ||
1592 | * we encode in the 'flags' shared capability | ||
1593 | */ | ||
1594 | #define FLAGS_SHARED 0x01 | ||
1595 | #define FLAGS_CLOCKRT 0x02 | ||
1596 | #define FLAGS_HAS_TIMEOUT 0x04 | ||
1597 | |||
1598 | static long futex_wait_restart(struct restart_block *restart); | 1604 | static long futex_wait_restart(struct restart_block *restart); |
1599 | 1605 | ||
1600 | /** | 1606 | /** |
1601 | * fixup_owner() - Post lock pi_state and corner case management | 1607 | * fixup_owner() - Post lock pi_state and corner case management |
1602 | * @uaddr: user address of the futex | 1608 | * @uaddr: user address of the futex |
1603 | * @fshared: whether the futex is shared (1) or not (0) | ||
1604 | * @q: futex_q (contains pi_state and access to the rt_mutex) | 1609 | * @q: futex_q (contains pi_state and access to the rt_mutex) |
1605 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) | 1610 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) |
1606 | * | 1611 | * |
@@ -1613,8 +1618,7 @@ static long futex_wait_restart(struct restart_block *restart); | |||
1613 | * 0 - success, lock not taken | 1618 | * 0 - success, lock not taken |
1614 | * <0 - on error (-EFAULT) | 1619 | * <0 - on error (-EFAULT) |
1615 | */ | 1620 | */ |
1616 | static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | 1621 | static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
1617 | int locked) | ||
1618 | { | 1622 | { |
1619 | struct task_struct *owner; | 1623 | struct task_struct *owner; |
1620 | int ret = 0; | 1624 | int ret = 0; |
@@ -1625,7 +1629,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | |||
1625 | * did a lock-steal - fix up the PI-state in that case: | 1629 | * did a lock-steal - fix up the PI-state in that case: |
1626 | */ | 1630 | */ |
1627 | if (q->pi_state->owner != current) | 1631 | if (q->pi_state->owner != current) |
1628 | ret = fixup_pi_state_owner(uaddr, q, current, fshared); | 1632 | ret = fixup_pi_state_owner(uaddr, q, current); |
1629 | goto out; | 1633 | goto out; |
1630 | } | 1634 | } |
1631 | 1635 | ||
@@ -1652,7 +1656,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | |||
1652 | * lock. Fix the state up. | 1656 | * lock. Fix the state up. |
1653 | */ | 1657 | */ |
1654 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); | 1658 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); |
1655 | ret = fixup_pi_state_owner(uaddr, q, owner, fshared); | 1659 | ret = fixup_pi_state_owner(uaddr, q, owner); |
1656 | goto out; | 1660 | goto out; |
1657 | } | 1661 | } |
1658 | 1662 | ||
@@ -1715,7 +1719,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1715 | * futex_wait_setup() - Prepare to wait on a futex | 1719 | * futex_wait_setup() - Prepare to wait on a futex |
1716 | * @uaddr: the futex userspace address | 1720 | * @uaddr: the futex userspace address |
1717 | * @val: the expected value | 1721 | * @val: the expected value |
1718 | * @fshared: whether the futex is shared (1) or not (0) | 1722 | * @flags: futex flags (FLAGS_SHARED, etc.) |
1719 | * @q: the associated futex_q | 1723 | * @q: the associated futex_q |
1720 | * @hb: storage for hash_bucket pointer to be returned to caller | 1724 | * @hb: storage for hash_bucket pointer to be returned to caller |
1721 | * | 1725 | * |
@@ -1728,7 +1732,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1728 | * 0 - uaddr contains val and hb has been locked | 1732 | * 0 - uaddr contains val and hb has been locked |
1729 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked | 1733 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked |
1730 | */ | 1734 | */ |
1731 | static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | 1735 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
1732 | struct futex_q *q, struct futex_hash_bucket **hb) | 1736 | struct futex_q *q, struct futex_hash_bucket **hb) |
1733 | { | 1737 | { |
1734 | u32 uval; | 1738 | u32 uval; |
@@ -1752,8 +1756,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | |||
1752 | * rare, but normal. | 1756 | * rare, but normal. |
1753 | */ | 1757 | */ |
1754 | retry: | 1758 | retry: |
1755 | q->key = FUTEX_KEY_INIT; | 1759 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); |
1756 | ret = get_futex_key(uaddr, fshared, &q->key); | ||
1757 | if (unlikely(ret != 0)) | 1760 | if (unlikely(ret != 0)) |
1758 | return ret; | 1761 | return ret; |
1759 | 1762 | ||
@@ -1769,10 +1772,10 @@ retry_private: | |||
1769 | if (ret) | 1772 | if (ret) |
1770 | goto out; | 1773 | goto out; |
1771 | 1774 | ||
1772 | if (!fshared) | 1775 | if (!(flags & FLAGS_SHARED)) |
1773 | goto retry_private; | 1776 | goto retry_private; |
1774 | 1777 | ||
1775 | put_futex_key(fshared, &q->key); | 1778 | put_futex_key(&q->key); |
1776 | goto retry; | 1779 | goto retry; |
1777 | } | 1780 | } |
1778 | 1781 | ||
@@ -1783,32 +1786,29 @@ retry_private: | |||
1783 | 1786 | ||
1784 | out: | 1787 | out: |
1785 | if (ret) | 1788 | if (ret) |
1786 | put_futex_key(fshared, &q->key); | 1789 | put_futex_key(&q->key); |
1787 | return ret; | 1790 | return ret; |
1788 | } | 1791 | } |
1789 | 1792 | ||
1790 | static int futex_wait(u32 __user *uaddr, int fshared, | 1793 | static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
1791 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) | 1794 | ktime_t *abs_time, u32 bitset) |
1792 | { | 1795 | { |
1793 | struct hrtimer_sleeper timeout, *to = NULL; | 1796 | struct hrtimer_sleeper timeout, *to = NULL; |
1794 | struct restart_block *restart; | 1797 | struct restart_block *restart; |
1795 | struct futex_hash_bucket *hb; | 1798 | struct futex_hash_bucket *hb; |
1796 | struct futex_q q; | 1799 | struct futex_q q = futex_q_init; |
1797 | int ret; | 1800 | int ret; |
1798 | 1801 | ||
1799 | if (!bitset) | 1802 | if (!bitset) |
1800 | return -EINVAL; | 1803 | return -EINVAL; |
1801 | |||
1802 | q.pi_state = NULL; | ||
1803 | q.bitset = bitset; | 1804 | q.bitset = bitset; |
1804 | q.rt_waiter = NULL; | ||
1805 | q.requeue_pi_key = NULL; | ||
1806 | 1805 | ||
1807 | if (abs_time) { | 1806 | if (abs_time) { |
1808 | to = &timeout; | 1807 | to = &timeout; |
1809 | 1808 | ||
1810 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | 1809 | hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
1811 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1810 | CLOCK_REALTIME : CLOCK_MONOTONIC, |
1811 | HRTIMER_MODE_ABS); | ||
1812 | hrtimer_init_sleeper(to, current); | 1812 | hrtimer_init_sleeper(to, current); |
1813 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | 1813 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
1814 | current->timer_slack_ns); | 1814 | current->timer_slack_ns); |
@@ -1819,7 +1819,7 @@ retry: | |||
1819 | * Prepare to wait on uaddr. On success, holds hb lock and increments | 1819 | * Prepare to wait on uaddr. On success, holds hb lock and increments |
1820 | * q.key refs. | 1820 | * q.key refs. |
1821 | */ | 1821 | */ |
1822 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1822 | ret = futex_wait_setup(uaddr, val, flags, &q, &hb); |
1823 | if (ret) | 1823 | if (ret) |
1824 | goto out; | 1824 | goto out; |
1825 | 1825 | ||
@@ -1852,12 +1852,7 @@ retry: | |||
1852 | restart->futex.val = val; | 1852 | restart->futex.val = val; |
1853 | restart->futex.time = abs_time->tv64; | 1853 | restart->futex.time = abs_time->tv64; |
1854 | restart->futex.bitset = bitset; | 1854 | restart->futex.bitset = bitset; |
1855 | restart->futex.flags = FLAGS_HAS_TIMEOUT; | 1855 | restart->futex.flags = flags; |
1856 | |||
1857 | if (fshared) | ||
1858 | restart->futex.flags |= FLAGS_SHARED; | ||
1859 | if (clockrt) | ||
1860 | restart->futex.flags |= FLAGS_CLOCKRT; | ||
1861 | 1856 | ||
1862 | ret = -ERESTART_RESTARTBLOCK; | 1857 | ret = -ERESTART_RESTARTBLOCK; |
1863 | 1858 | ||
@@ -1873,7 +1868,6 @@ out: | |||
1873 | static long futex_wait_restart(struct restart_block *restart) | 1868 | static long futex_wait_restart(struct restart_block *restart) |
1874 | { | 1869 | { |
1875 | u32 __user *uaddr = restart->futex.uaddr; | 1870 | u32 __user *uaddr = restart->futex.uaddr; |
1876 | int fshared = 0; | ||
1877 | ktime_t t, *tp = NULL; | 1871 | ktime_t t, *tp = NULL; |
1878 | 1872 | ||
1879 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { | 1873 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { |
@@ -1881,11 +1875,9 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1881 | tp = &t; | 1875 | tp = &t; |
1882 | } | 1876 | } |
1883 | restart->fn = do_no_restart_syscall; | 1877 | restart->fn = do_no_restart_syscall; |
1884 | if (restart->futex.flags & FLAGS_SHARED) | 1878 | |
1885 | fshared = 1; | 1879 | return (long)futex_wait(uaddr, restart->futex.flags, |
1886 | return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, | 1880 | restart->futex.val, tp, restart->futex.bitset); |
1887 | restart->futex.bitset, | ||
1888 | restart->futex.flags & FLAGS_CLOCKRT); | ||
1889 | } | 1881 | } |
1890 | 1882 | ||
1891 | 1883 | ||
@@ -1895,12 +1887,12 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1895 | * if there are waiters then it will block, it does PI, etc. (Due to | 1887 | * if there are waiters then it will block, it does PI, etc. (Due to |
1896 | * races the kernel might see a 0 value of the futex too.) | 1888 | * races the kernel might see a 0 value of the futex too.) |
1897 | */ | 1889 | */ |
1898 | static int futex_lock_pi(u32 __user *uaddr, int fshared, | 1890 | static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, |
1899 | int detect, ktime_t *time, int trylock) | 1891 | ktime_t *time, int trylock) |
1900 | { | 1892 | { |
1901 | struct hrtimer_sleeper timeout, *to = NULL; | 1893 | struct hrtimer_sleeper timeout, *to = NULL; |
1902 | struct futex_hash_bucket *hb; | 1894 | struct futex_hash_bucket *hb; |
1903 | struct futex_q q; | 1895 | struct futex_q q = futex_q_init; |
1904 | int res, ret; | 1896 | int res, ret; |
1905 | 1897 | ||
1906 | if (refill_pi_state_cache()) | 1898 | if (refill_pi_state_cache()) |
@@ -1914,12 +1906,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1914 | hrtimer_set_expires(&to->timer, *time); | 1906 | hrtimer_set_expires(&to->timer, *time); |
1915 | } | 1907 | } |
1916 | 1908 | ||
1917 | q.pi_state = NULL; | ||
1918 | q.rt_waiter = NULL; | ||
1919 | q.requeue_pi_key = NULL; | ||
1920 | retry: | 1909 | retry: |
1921 | q.key = FUTEX_KEY_INIT; | 1910 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); |
1922 | ret = get_futex_key(uaddr, fshared, &q.key); | ||
1923 | if (unlikely(ret != 0)) | 1911 | if (unlikely(ret != 0)) |
1924 | goto out; | 1912 | goto out; |
1925 | 1913 | ||
@@ -1941,7 +1929,7 @@ retry_private: | |||
1941 | * exit to complete. | 1929 | * exit to complete. |
1942 | */ | 1930 | */ |
1943 | queue_unlock(&q, hb); | 1931 | queue_unlock(&q, hb); |
1944 | put_futex_key(fshared, &q.key); | 1932 | put_futex_key(&q.key); |
1945 | cond_resched(); | 1933 | cond_resched(); |
1946 | goto retry; | 1934 | goto retry; |
1947 | default: | 1935 | default: |
@@ -1971,7 +1959,7 @@ retry_private: | |||
1971 | * Fixup the pi_state owner and possibly acquire the lock if we | 1959 | * Fixup the pi_state owner and possibly acquire the lock if we |
1972 | * haven't already. | 1960 | * haven't already. |
1973 | */ | 1961 | */ |
1974 | res = fixup_owner(uaddr, fshared, &q, !ret); | 1962 | res = fixup_owner(uaddr, &q, !ret); |
1975 | /* | 1963 | /* |
1976 | * If fixup_owner() returned an error, proprogate that. If it acquired | 1964 | * If fixup_owner() returned an error, proprogate that. If it acquired |
1977 | * the lock, clear our -ETIMEDOUT or -EINTR. | 1965 | * the lock, clear our -ETIMEDOUT or -EINTR. |
@@ -1995,7 +1983,7 @@ out_unlock_put_key: | |||
1995 | queue_unlock(&q, hb); | 1983 | queue_unlock(&q, hb); |
1996 | 1984 | ||
1997 | out_put_key: | 1985 | out_put_key: |
1998 | put_futex_key(fshared, &q.key); | 1986 | put_futex_key(&q.key); |
1999 | out: | 1987 | out: |
2000 | if (to) | 1988 | if (to) |
2001 | destroy_hrtimer_on_stack(&to->timer); | 1989 | destroy_hrtimer_on_stack(&to->timer); |
@@ -2008,10 +1996,10 @@ uaddr_faulted: | |||
2008 | if (ret) | 1996 | if (ret) |
2009 | goto out_put_key; | 1997 | goto out_put_key; |
2010 | 1998 | ||
2011 | if (!fshared) | 1999 | if (!(flags & FLAGS_SHARED)) |
2012 | goto retry_private; | 2000 | goto retry_private; |
2013 | 2001 | ||
2014 | put_futex_key(fshared, &q.key); | 2002 | put_futex_key(&q.key); |
2015 | goto retry; | 2003 | goto retry; |
2016 | } | 2004 | } |
2017 | 2005 | ||
@@ -2020,7 +2008,7 @@ uaddr_faulted: | |||
2020 | * This is the in-kernel slowpath: we look up the PI state (if any), | 2008 | * This is the in-kernel slowpath: we look up the PI state (if any), |
2021 | * and do the rt-mutex unlock. | 2009 | * and do the rt-mutex unlock. |
2022 | */ | 2010 | */ |
2023 | static int futex_unlock_pi(u32 __user *uaddr, int fshared) | 2011 | static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
2024 | { | 2012 | { |
2025 | struct futex_hash_bucket *hb; | 2013 | struct futex_hash_bucket *hb; |
2026 | struct futex_q *this, *next; | 2014 | struct futex_q *this, *next; |
@@ -2038,7 +2026,7 @@ retry: | |||
2038 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) | 2026 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) |
2039 | return -EPERM; | 2027 | return -EPERM; |
2040 | 2028 | ||
2041 | ret = get_futex_key(uaddr, fshared, &key); | 2029 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); |
2042 | if (unlikely(ret != 0)) | 2030 | if (unlikely(ret != 0)) |
2043 | goto out; | 2031 | goto out; |
2044 | 2032 | ||
@@ -2093,14 +2081,14 @@ retry: | |||
2093 | 2081 | ||
2094 | out_unlock: | 2082 | out_unlock: |
2095 | spin_unlock(&hb->lock); | 2083 | spin_unlock(&hb->lock); |
2096 | put_futex_key(fshared, &key); | 2084 | put_futex_key(&key); |
2097 | 2085 | ||
2098 | out: | 2086 | out: |
2099 | return ret; | 2087 | return ret; |
2100 | 2088 | ||
2101 | pi_faulted: | 2089 | pi_faulted: |
2102 | spin_unlock(&hb->lock); | 2090 | spin_unlock(&hb->lock); |
2103 | put_futex_key(fshared, &key); | 2091 | put_futex_key(&key); |
2104 | 2092 | ||
2105 | ret = fault_in_user_writeable(uaddr); | 2093 | ret = fault_in_user_writeable(uaddr); |
2106 | if (!ret) | 2094 | if (!ret) |
@@ -2160,7 +2148,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2160 | /** | 2148 | /** |
2161 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2149 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
2162 | * @uaddr: the futex we initially wait on (non-pi) | 2150 | * @uaddr: the futex we initially wait on (non-pi) |
2163 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2151 | * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be |
2164 | * the same type, no requeueing from private to shared, etc. | 2152 | * the same type, no requeueing from private to shared, etc. |
2165 | * @val: the expected value of uaddr | 2153 | * @val: the expected value of uaddr |
2166 | * @abs_time: absolute timeout | 2154 | * @abs_time: absolute timeout |
@@ -2198,16 +2186,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2198 | * 0 - On success | 2186 | * 0 - On success |
2199 | * <0 - On error | 2187 | * <0 - On error |
2200 | */ | 2188 | */ |
2201 | static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | 2189 | static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
2202 | u32 val, ktime_t *abs_time, u32 bitset, | 2190 | u32 val, ktime_t *abs_time, u32 bitset, |
2203 | int clockrt, u32 __user *uaddr2) | 2191 | u32 __user *uaddr2) |
2204 | { | 2192 | { |
2205 | struct hrtimer_sleeper timeout, *to = NULL; | 2193 | struct hrtimer_sleeper timeout, *to = NULL; |
2206 | struct rt_mutex_waiter rt_waiter; | 2194 | struct rt_mutex_waiter rt_waiter; |
2207 | struct rt_mutex *pi_mutex = NULL; | 2195 | struct rt_mutex *pi_mutex = NULL; |
2208 | struct futex_hash_bucket *hb; | 2196 | struct futex_hash_bucket *hb; |
2209 | union futex_key key2; | 2197 | union futex_key key2 = FUTEX_KEY_INIT; |
2210 | struct futex_q q; | 2198 | struct futex_q q = futex_q_init; |
2211 | int res, ret; | 2199 | int res, ret; |
2212 | 2200 | ||
2213 | if (!bitset) | 2201 | if (!bitset) |
@@ -2215,8 +2203,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2215 | 2203 | ||
2216 | if (abs_time) { | 2204 | if (abs_time) { |
2217 | to = &timeout; | 2205 | to = &timeout; |
2218 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | 2206 | hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
2219 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 2207 | CLOCK_REALTIME : CLOCK_MONOTONIC, |
2208 | HRTIMER_MODE_ABS); | ||
2220 | hrtimer_init_sleeper(to, current); | 2209 | hrtimer_init_sleeper(to, current); |
2221 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | 2210 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
2222 | current->timer_slack_ns); | 2211 | current->timer_slack_ns); |
@@ -2229,12 +2218,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2229 | debug_rt_mutex_init_waiter(&rt_waiter); | 2218 | debug_rt_mutex_init_waiter(&rt_waiter); |
2230 | rt_waiter.task = NULL; | 2219 | rt_waiter.task = NULL; |
2231 | 2220 | ||
2232 | key2 = FUTEX_KEY_INIT; | 2221 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
2233 | ret = get_futex_key(uaddr2, fshared, &key2); | ||
2234 | if (unlikely(ret != 0)) | 2222 | if (unlikely(ret != 0)) |
2235 | goto out; | 2223 | goto out; |
2236 | 2224 | ||
2237 | q.pi_state = NULL; | ||
2238 | q.bitset = bitset; | 2225 | q.bitset = bitset; |
2239 | q.rt_waiter = &rt_waiter; | 2226 | q.rt_waiter = &rt_waiter; |
2240 | q.requeue_pi_key = &key2; | 2227 | q.requeue_pi_key = &key2; |
@@ -2243,7 +2230,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2243 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref | 2230 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref |
2244 | * count. | 2231 | * count. |
2245 | */ | 2232 | */ |
2246 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2233 | ret = futex_wait_setup(uaddr, val, flags, &q, &hb); |
2247 | if (ret) | 2234 | if (ret) |
2248 | goto out_key2; | 2235 | goto out_key2; |
2249 | 2236 | ||
@@ -2273,8 +2260,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2273 | */ | 2260 | */ |
2274 | if (q.pi_state && (q.pi_state->owner != current)) { | 2261 | if (q.pi_state && (q.pi_state->owner != current)) { |
2275 | spin_lock(q.lock_ptr); | 2262 | spin_lock(q.lock_ptr); |
2276 | ret = fixup_pi_state_owner(uaddr2, &q, current, | 2263 | ret = fixup_pi_state_owner(uaddr2, &q, current); |
2277 | fshared); | ||
2278 | spin_unlock(q.lock_ptr); | 2264 | spin_unlock(q.lock_ptr); |
2279 | } | 2265 | } |
2280 | } else { | 2266 | } else { |
@@ -2293,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2293 | * Fixup the pi_state owner and possibly acquire the lock if we | 2279 | * Fixup the pi_state owner and possibly acquire the lock if we |
2294 | * haven't already. | 2280 | * haven't already. |
2295 | */ | 2281 | */ |
2296 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2282 | res = fixup_owner(uaddr2, &q, !ret); |
2297 | /* | 2283 | /* |
2298 | * If fixup_owner() returned an error, proprogate that. If it | 2284 | * If fixup_owner() returned an error, proprogate that. If it |
2299 | * acquired the lock, clear -ETIMEDOUT or -EINTR. | 2285 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
@@ -2324,9 +2310,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2324 | } | 2310 | } |
2325 | 2311 | ||
2326 | out_put_keys: | 2312 | out_put_keys: |
2327 | put_futex_key(fshared, &q.key); | 2313 | put_futex_key(&q.key); |
2328 | out_key2: | 2314 | out_key2: |
2329 | put_futex_key(fshared, &key2); | 2315 | put_futex_key(&key2); |
2330 | 2316 | ||
2331 | out: | 2317 | out: |
2332 | if (to) { | 2318 | if (to) { |
@@ -2551,58 +2537,57 @@ void exit_robust_list(struct task_struct *curr) | |||
2551 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | 2537 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
2552 | u32 __user *uaddr2, u32 val2, u32 val3) | 2538 | u32 __user *uaddr2, u32 val2, u32 val3) |
2553 | { | 2539 | { |
2554 | int clockrt, ret = -ENOSYS; | 2540 | int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK; |
2555 | int cmd = op & FUTEX_CMD_MASK; | 2541 | unsigned int flags = 0; |
2556 | int fshared = 0; | ||
2557 | 2542 | ||
2558 | if (!(op & FUTEX_PRIVATE_FLAG)) | 2543 | if (!(op & FUTEX_PRIVATE_FLAG)) |
2559 | fshared = 1; | 2544 | flags |= FLAGS_SHARED; |
2560 | 2545 | ||
2561 | clockrt = op & FUTEX_CLOCK_REALTIME; | 2546 | if (op & FUTEX_CLOCK_REALTIME) { |
2562 | if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) | 2547 | flags |= FLAGS_CLOCKRT; |
2563 | return -ENOSYS; | 2548 | if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) |
2549 | return -ENOSYS; | ||
2550 | } | ||
2564 | 2551 | ||
2565 | switch (cmd) { | 2552 | switch (cmd) { |
2566 | case FUTEX_WAIT: | 2553 | case FUTEX_WAIT: |
2567 | val3 = FUTEX_BITSET_MATCH_ANY; | 2554 | val3 = FUTEX_BITSET_MATCH_ANY; |
2568 | case FUTEX_WAIT_BITSET: | 2555 | case FUTEX_WAIT_BITSET: |
2569 | ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); | 2556 | ret = futex_wait(uaddr, flags, val, timeout, val3); |
2570 | break; | 2557 | break; |
2571 | case FUTEX_WAKE: | 2558 | case FUTEX_WAKE: |
2572 | val3 = FUTEX_BITSET_MATCH_ANY; | 2559 | val3 = FUTEX_BITSET_MATCH_ANY; |
2573 | case FUTEX_WAKE_BITSET: | 2560 | case FUTEX_WAKE_BITSET: |
2574 | ret = futex_wake(uaddr, fshared, val, val3); | 2561 | ret = futex_wake(uaddr, flags, val, val3); |
2575 | break; | 2562 | break; |
2576 | case FUTEX_REQUEUE: | 2563 | case FUTEX_REQUEUE: |
2577 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); | 2564 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); |
2578 | break; | 2565 | break; |
2579 | case FUTEX_CMP_REQUEUE: | 2566 | case FUTEX_CMP_REQUEUE: |
2580 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | 2567 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); |
2581 | 0); | ||
2582 | break; | 2568 | break; |
2583 | case FUTEX_WAKE_OP: | 2569 | case FUTEX_WAKE_OP: |
2584 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2570 | ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); |
2585 | break; | 2571 | break; |
2586 | case FUTEX_LOCK_PI: | 2572 | case FUTEX_LOCK_PI: |
2587 | if (futex_cmpxchg_enabled) | 2573 | if (futex_cmpxchg_enabled) |
2588 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | 2574 | ret = futex_lock_pi(uaddr, flags, val, timeout, 0); |
2589 | break; | 2575 | break; |
2590 | case FUTEX_UNLOCK_PI: | 2576 | case FUTEX_UNLOCK_PI: |
2591 | if (futex_cmpxchg_enabled) | 2577 | if (futex_cmpxchg_enabled) |
2592 | ret = futex_unlock_pi(uaddr, fshared); | 2578 | ret = futex_unlock_pi(uaddr, flags); |
2593 | break; | 2579 | break; |
2594 | case FUTEX_TRYLOCK_PI: | 2580 | case FUTEX_TRYLOCK_PI: |
2595 | if (futex_cmpxchg_enabled) | 2581 | if (futex_cmpxchg_enabled) |
2596 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2582 | ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); |
2597 | break; | 2583 | break; |
2598 | case FUTEX_WAIT_REQUEUE_PI: | 2584 | case FUTEX_WAIT_REQUEUE_PI: |
2599 | val3 = FUTEX_BITSET_MATCH_ANY; | 2585 | val3 = FUTEX_BITSET_MATCH_ANY; |
2600 | ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, | 2586 | ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, |
2601 | clockrt, uaddr2); | 2587 | uaddr2); |
2602 | break; | 2588 | break; |
2603 | case FUTEX_CMP_REQUEUE_PI: | 2589 | case FUTEX_CMP_REQUEUE_PI: |
2604 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | 2590 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); |
2605 | 1); | ||
2606 | break; | 2591 | break; |
2607 | default: | 2592 | default: |
2608 | ret = -ENOSYS; | 2593 | ret = -ENOSYS; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 72206cf5c6cf..f2429fc3438c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
516 | 516 | ||
517 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | 517 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
518 | struct hrtimer *timer; | 518 | struct hrtimer *timer; |
519 | struct timerqueue_node *next; | ||
519 | 520 | ||
520 | if (!base->first) | 521 | next = timerqueue_getnext(&base->active); |
522 | if (!next) | ||
521 | continue; | 523 | continue; |
522 | timer = rb_entry(base->first, struct hrtimer, node); | 524 | timer = container_of(next, struct hrtimer, node); |
525 | |||
523 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | 526 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
524 | /* | 527 | /* |
525 | * clock_was_set() has changed base->offset so the | 528 | * clock_was_set() has changed base->offset so the |
@@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); | |||
840 | static int enqueue_hrtimer(struct hrtimer *timer, | 843 | static int enqueue_hrtimer(struct hrtimer *timer, |
841 | struct hrtimer_clock_base *base) | 844 | struct hrtimer_clock_base *base) |
842 | { | 845 | { |
843 | struct rb_node **link = &base->active.rb_node; | ||
844 | struct rb_node *parent = NULL; | ||
845 | struct hrtimer *entry; | ||
846 | int leftmost = 1; | ||
847 | |||
848 | debug_activate(timer); | 846 | debug_activate(timer); |
849 | 847 | ||
850 | /* | 848 | timerqueue_add(&base->active, &timer->node); |
851 | * Find the right place in the rbtree: | ||
852 | */ | ||
853 | while (*link) { | ||
854 | parent = *link; | ||
855 | entry = rb_entry(parent, struct hrtimer, node); | ||
856 | /* | ||
857 | * We dont care about collisions. Nodes with | ||
858 | * the same expiry time stay together. | ||
859 | */ | ||
860 | if (hrtimer_get_expires_tv64(timer) < | ||
861 | hrtimer_get_expires_tv64(entry)) { | ||
862 | link = &(*link)->rb_left; | ||
863 | } else { | ||
864 | link = &(*link)->rb_right; | ||
865 | leftmost = 0; | ||
866 | } | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * Insert the timer to the rbtree and check whether it | ||
871 | * replaces the first pending timer | ||
872 | */ | ||
873 | if (leftmost) | ||
874 | base->first = &timer->node; | ||
875 | 849 | ||
876 | rb_link_node(&timer->node, parent, link); | ||
877 | rb_insert_color(&timer->node, &base->active); | ||
878 | /* | 850 | /* |
879 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | 851 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the |
880 | * state of a possibly running callback. | 852 | * state of a possibly running callback. |
881 | */ | 853 | */ |
882 | timer->state |= HRTIMER_STATE_ENQUEUED; | 854 | timer->state |= HRTIMER_STATE_ENQUEUED; |
883 | 855 | ||
884 | return leftmost; | 856 | return (&timer->node == base->active.next); |
885 | } | 857 | } |
886 | 858 | ||
887 | /* | 859 | /* |
@@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer, | |||
901 | if (!(timer->state & HRTIMER_STATE_ENQUEUED)) | 873 | if (!(timer->state & HRTIMER_STATE_ENQUEUED)) |
902 | goto out; | 874 | goto out; |
903 | 875 | ||
904 | /* | 876 | if (&timer->node == timerqueue_getnext(&base->active)) { |
905 | * Remove the timer from the rbtree and replace the first | ||
906 | * entry pointer if necessary. | ||
907 | */ | ||
908 | if (base->first == &timer->node) { | ||
909 | base->first = rb_next(&timer->node); | ||
910 | #ifdef CONFIG_HIGH_RES_TIMERS | 877 | #ifdef CONFIG_HIGH_RES_TIMERS |
911 | /* Reprogram the clock event device. if enabled */ | 878 | /* Reprogram the clock event device. if enabled */ |
912 | if (reprogram && hrtimer_hres_active()) { | 879 | if (reprogram && hrtimer_hres_active()) { |
@@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer, | |||
919 | } | 886 | } |
920 | #endif | 887 | #endif |
921 | } | 888 | } |
922 | rb_erase(&timer->node, &base->active); | 889 | timerqueue_del(&base->active, &timer->node); |
923 | out: | 890 | out: |
924 | timer->state = newstate; | 891 | timer->state = newstate; |
925 | } | 892 | } |
@@ -1128,11 +1095,13 @@ ktime_t hrtimer_get_next_event(void) | |||
1128 | if (!hrtimer_hres_active()) { | 1095 | if (!hrtimer_hres_active()) { |
1129 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | 1096 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
1130 | struct hrtimer *timer; | 1097 | struct hrtimer *timer; |
1098 | struct timerqueue_node *next; | ||
1131 | 1099 | ||
1132 | if (!base->first) | 1100 | next = timerqueue_getnext(&base->active); |
1101 | if (!next) | ||
1133 | continue; | 1102 | continue; |
1134 | 1103 | ||
1135 | timer = rb_entry(base->first, struct hrtimer, node); | 1104 | timer = container_of(next, struct hrtimer, node); |
1136 | delta.tv64 = hrtimer_get_expires_tv64(timer); | 1105 | delta.tv64 = hrtimer_get_expires_tv64(timer); |
1137 | delta = ktime_sub(delta, base->get_time()); | 1106 | delta = ktime_sub(delta, base->get_time()); |
1138 | if (delta.tv64 < mindelta.tv64) | 1107 | if (delta.tv64 < mindelta.tv64) |
@@ -1162,6 +1131,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1162 | 1131 | ||
1163 | timer->base = &cpu_base->clock_base[clock_id]; | 1132 | timer->base = &cpu_base->clock_base[clock_id]; |
1164 | hrtimer_init_timer_hres(timer); | 1133 | hrtimer_init_timer_hres(timer); |
1134 | timerqueue_init(&timer->node); | ||
1165 | 1135 | ||
1166 | #ifdef CONFIG_TIMER_STATS | 1136 | #ifdef CONFIG_TIMER_STATS |
1167 | timer->start_site = NULL; | 1137 | timer->start_site = NULL; |
@@ -1278,14 +1248,14 @@ retry: | |||
1278 | 1248 | ||
1279 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1249 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1280 | ktime_t basenow; | 1250 | ktime_t basenow; |
1281 | struct rb_node *node; | 1251 | struct timerqueue_node *node; |
1282 | 1252 | ||
1283 | basenow = ktime_add(now, base->offset); | 1253 | basenow = ktime_add(now, base->offset); |
1284 | 1254 | ||
1285 | while ((node = base->first)) { | 1255 | while ((node = timerqueue_getnext(&base->active))) { |
1286 | struct hrtimer *timer; | 1256 | struct hrtimer *timer; |
1287 | 1257 | ||
1288 | timer = rb_entry(node, struct hrtimer, node); | 1258 | timer = container_of(node, struct hrtimer, node); |
1289 | 1259 | ||
1290 | /* | 1260 | /* |
1291 | * The immediate goal for using the softexpires is | 1261 | * The immediate goal for using the softexpires is |
@@ -1441,7 +1411,7 @@ void hrtimer_run_pending(void) | |||
1441 | */ | 1411 | */ |
1442 | void hrtimer_run_queues(void) | 1412 | void hrtimer_run_queues(void) |
1443 | { | 1413 | { |
1444 | struct rb_node *node; | 1414 | struct timerqueue_node *node; |
1445 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1415 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
1446 | struct hrtimer_clock_base *base; | 1416 | struct hrtimer_clock_base *base; |
1447 | int index, gettime = 1; | 1417 | int index, gettime = 1; |
@@ -1451,8 +1421,7 @@ void hrtimer_run_queues(void) | |||
1451 | 1421 | ||
1452 | for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | 1422 | for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { |
1453 | base = &cpu_base->clock_base[index]; | 1423 | base = &cpu_base->clock_base[index]; |
1454 | 1424 | if (!timerqueue_getnext(&base->active)) | |
1455 | if (!base->first) | ||
1456 | continue; | 1425 | continue; |
1457 | 1426 | ||
1458 | if (gettime) { | 1427 | if (gettime) { |
@@ -1462,10 +1431,10 @@ void hrtimer_run_queues(void) | |||
1462 | 1431 | ||
1463 | raw_spin_lock(&cpu_base->lock); | 1432 | raw_spin_lock(&cpu_base->lock); |
1464 | 1433 | ||
1465 | while ((node = base->first)) { | 1434 | while ((node = timerqueue_getnext(&base->active))) { |
1466 | struct hrtimer *timer; | 1435 | struct hrtimer *timer; |
1467 | 1436 | ||
1468 | timer = rb_entry(node, struct hrtimer, node); | 1437 | timer = container_of(node, struct hrtimer, node); |
1469 | if (base->softirq_time.tv64 <= | 1438 | if (base->softirq_time.tv64 <= |
1470 | hrtimer_get_expires_tv64(timer)) | 1439 | hrtimer_get_expires_tv64(timer)) |
1471 | break; | 1440 | break; |
@@ -1630,8 +1599,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1630 | 1599 | ||
1631 | raw_spin_lock_init(&cpu_base->lock); | 1600 | raw_spin_lock_init(&cpu_base->lock); |
1632 | 1601 | ||
1633 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | 1602 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1634 | cpu_base->clock_base[i].cpu_base = cpu_base; | 1603 | cpu_base->clock_base[i].cpu_base = cpu_base; |
1604 | timerqueue_init_head(&cpu_base->clock_base[i].active); | ||
1605 | } | ||
1635 | 1606 | ||
1636 | hrtimer_init_hres(cpu_base); | 1607 | hrtimer_init_hres(cpu_base); |
1637 | } | 1608 | } |
@@ -1642,10 +1613,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, | |||
1642 | struct hrtimer_clock_base *new_base) | 1613 | struct hrtimer_clock_base *new_base) |
1643 | { | 1614 | { |
1644 | struct hrtimer *timer; | 1615 | struct hrtimer *timer; |
1645 | struct rb_node *node; | 1616 | struct timerqueue_node *node; |
1646 | 1617 | ||
1647 | while ((node = rb_first(&old_base->active))) { | 1618 | while ((node = timerqueue_getnext(&old_base->active))) { |
1648 | timer = rb_entry(node, struct hrtimer, node); | 1619 | timer = container_of(node, struct hrtimer, node); |
1649 | BUG_ON(hrtimer_callback_running(timer)); | 1620 | BUG_ON(hrtimer_callback_running(timer)); |
1650 | debug_deactivate(timer); | 1621 | debug_deactivate(timer); |
1651 | 1622 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5f92acc5f952..91a5fa25054e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } | |||
577 | */ | 577 | */ |
578 | static int irq_thread(void *data) | 578 | static int irq_thread(void *data) |
579 | { | 579 | { |
580 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; | 580 | static struct sched_param param = { |
581 | .sched_priority = MAX_USER_RT_PRIO/2, | ||
582 | }; | ||
581 | struct irqaction *action = data; | 583 | struct irqaction *action = data; |
582 | struct irq_desc *desc = irq_to_desc(action->irq); | 584 | struct irq_desc *desc = irq_to_desc(action->irq); |
583 | int wake, oneshot = desc->status & IRQ_ONESHOT; | 585 | int wake, oneshot = desc->status & IRQ_ONESHOT; |
diff --git a/kernel/kthread.c b/kernel/kthread.c index ca61bbdd44b2..5355cfd44a3f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
148 | wait_for_completion(&create.done); | 148 | wait_for_completion(&create.done); |
149 | 149 | ||
150 | if (!IS_ERR(create.result)) { | 150 | if (!IS_ERR(create.result)) { |
151 | struct sched_param param = { .sched_priority = 0 }; | 151 | static struct sched_param param = { .sched_priority = 0 }; |
152 | va_list args; | 152 | va_list args; |
153 | 153 | ||
154 | va_start(args, namefmt); | 154 | va_start(args, namefmt); |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 59b76c8ce9d7..1969d2fc4b36 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -494,7 +494,6 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | |||
494 | namelen += 2; | 494 | namelen += 2; |
495 | 495 | ||
496 | for (i = 0; i < LOCKSTAT_POINTS; i++) { | 496 | for (i = 0; i < LOCKSTAT_POINTS; i++) { |
497 | char sym[KSYM_SYMBOL_LEN]; | ||
498 | char ip[32]; | 497 | char ip[32]; |
499 | 498 | ||
500 | if (class->contention_point[i] == 0) | 499 | if (class->contention_point[i] == 0) |
@@ -503,15 +502,13 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | |||
503 | if (!i) | 502 | if (!i) |
504 | seq_line(m, '-', 40-namelen, namelen); | 503 | seq_line(m, '-', 40-namelen, namelen); |
505 | 504 | ||
506 | sprint_symbol(sym, class->contention_point[i]); | ||
507 | snprintf(ip, sizeof(ip), "[<%p>]", | 505 | snprintf(ip, sizeof(ip), "[<%p>]", |
508 | (void *)class->contention_point[i]); | 506 | (void *)class->contention_point[i]); |
509 | seq_printf(m, "%40s %14lu %29s %s\n", name, | 507 | seq_printf(m, "%40s %14lu %29s %pS\n", |
510 | stats->contention_point[i], | 508 | name, stats->contention_point[i], |
511 | ip, sym); | 509 | ip, (void *)class->contention_point[i]); |
512 | } | 510 | } |
513 | for (i = 0; i < LOCKSTAT_POINTS; i++) { | 511 | for (i = 0; i < LOCKSTAT_POINTS; i++) { |
514 | char sym[KSYM_SYMBOL_LEN]; | ||
515 | char ip[32]; | 512 | char ip[32]; |
516 | 513 | ||
517 | if (class->contending_point[i] == 0) | 514 | if (class->contending_point[i] == 0) |
@@ -520,12 +517,11 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) | |||
520 | if (!i) | 517 | if (!i) |
521 | seq_line(m, '-', 40-namelen, namelen); | 518 | seq_line(m, '-', 40-namelen, namelen); |
522 | 519 | ||
523 | sprint_symbol(sym, class->contending_point[i]); | ||
524 | snprintf(ip, sizeof(ip), "[<%p>]", | 520 | snprintf(ip, sizeof(ip), "[<%p>]", |
525 | (void *)class->contending_point[i]); | 521 | (void *)class->contending_point[i]); |
526 | seq_printf(m, "%40s %14lu %29s %s\n", name, | 522 | seq_printf(m, "%40s %14lu %29s %pS\n", |
527 | stats->contending_point[i], | 523 | name, stats->contending_point[i], |
528 | ip, sym); | 524 | ip, (void *)class->contending_point[i]); |
529 | } | 525 | } |
530 | if (i) { | 526 | if (i) { |
531 | seq_puts(m, "\n"); | 527 | seq_puts(m, "\n"); |
diff --git a/kernel/module.c b/kernel/module.c index d190664f25ff..34e00b708fad 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
57 | #include <linux/kmemleak.h> | 57 | #include <linux/kmemleak.h> |
58 | #include <linux/jump_label.h> | 58 | #include <linux/jump_label.h> |
59 | #include <linux/pfn.h> | ||
59 | 60 | ||
60 | #define CREATE_TRACE_POINTS | 61 | #define CREATE_TRACE_POINTS |
61 | #include <trace/events/module.h> | 62 | #include <trace/events/module.h> |
@@ -70,6 +71,26 @@ | |||
70 | #define ARCH_SHF_SMALL 0 | 71 | #define ARCH_SHF_SMALL 0 |
71 | #endif | 72 | #endif |
72 | 73 | ||
74 | /* | ||
75 | * Modules' sections will be aligned on page boundaries | ||
76 | * to ensure complete separation of code and data, but | ||
77 | * only when CONFIG_DEBUG_SET_MODULE_RONX=y | ||
78 | */ | ||
79 | #ifdef CONFIG_DEBUG_SET_MODULE_RONX | ||
80 | # define debug_align(X) ALIGN(X, PAGE_SIZE) | ||
81 | #else | ||
82 | # define debug_align(X) (X) | ||
83 | #endif | ||
84 | |||
85 | /* | ||
86 | * Given BASE and SIZE this macro calculates the number of pages the | ||
87 | * memory regions occupies | ||
88 | */ | ||
89 | #define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ | ||
90 | (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ | ||
91 | PFN_DOWN((unsigned long)BASE) + 1) \ | ||
92 | : (0UL)) | ||
93 | |||
73 | /* If this is set, the section belongs in the init part of the module */ | 94 | /* If this is set, the section belongs in the init part of the module */ |
74 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 95 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
75 | 96 | ||
@@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) | |||
1542 | return 0; | 1563 | return 0; |
1543 | } | 1564 | } |
1544 | 1565 | ||
1566 | #ifdef CONFIG_DEBUG_SET_MODULE_RONX | ||
1567 | /* | ||
1568 | * LKM RO/NX protection: protect module's text/ro-data | ||
1569 | * from modification and any data from execution. | ||
1570 | */ | ||
1571 | void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) | ||
1572 | { | ||
1573 | unsigned long begin_pfn = PFN_DOWN((unsigned long)start); | ||
1574 | unsigned long end_pfn = PFN_DOWN((unsigned long)end); | ||
1575 | |||
1576 | if (end_pfn > begin_pfn) | ||
1577 | set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); | ||
1578 | } | ||
1579 | |||
1580 | static void set_section_ro_nx(void *base, | ||
1581 | unsigned long text_size, | ||
1582 | unsigned long ro_size, | ||
1583 | unsigned long total_size) | ||
1584 | { | ||
1585 | /* begin and end PFNs of the current subsection */ | ||
1586 | unsigned long begin_pfn; | ||
1587 | unsigned long end_pfn; | ||
1588 | |||
1589 | /* | ||
1590 | * Set RO for module text and RO-data: | ||
1591 | * - Always protect first page. | ||
1592 | * - Do not protect last partial page. | ||
1593 | */ | ||
1594 | if (ro_size > 0) | ||
1595 | set_page_attributes(base, base + ro_size, set_memory_ro); | ||
1596 | |||
1597 | /* | ||
1598 | * Set NX permissions for module data: | ||
1599 | * - Do not protect first partial page. | ||
1600 | * - Always protect last page. | ||
1601 | */ | ||
1602 | if (total_size > text_size) { | ||
1603 | begin_pfn = PFN_UP((unsigned long)base + text_size); | ||
1604 | end_pfn = PFN_UP((unsigned long)base + total_size); | ||
1605 | if (end_pfn > begin_pfn) | ||
1606 | set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); | ||
1607 | } | ||
1608 | } | ||
1609 | |||
1610 | /* Setting memory back to RW+NX before releasing it */ | ||
1611 | void unset_section_ro_nx(struct module *mod, void *module_region) | ||
1612 | { | ||
1613 | unsigned long total_pages; | ||
1614 | |||
1615 | if (mod->module_core == module_region) { | ||
1616 | /* Set core as NX+RW */ | ||
1617 | total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); | ||
1618 | set_memory_nx((unsigned long)mod->module_core, total_pages); | ||
1619 | set_memory_rw((unsigned long)mod->module_core, total_pages); | ||
1620 | |||
1621 | } else if (mod->module_init == module_region) { | ||
1622 | /* Set init as NX+RW */ | ||
1623 | total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); | ||
1624 | set_memory_nx((unsigned long)mod->module_init, total_pages); | ||
1625 | set_memory_rw((unsigned long)mod->module_init, total_pages); | ||
1626 | } | ||
1627 | } | ||
1628 | |||
1629 | /* Iterate through all modules and set each module's text as RW */ | ||
1630 | void set_all_modules_text_rw() | ||
1631 | { | ||
1632 | struct module *mod; | ||
1633 | |||
1634 | mutex_lock(&module_mutex); | ||
1635 | list_for_each_entry_rcu(mod, &modules, list) { | ||
1636 | if ((mod->module_core) && (mod->core_text_size)) { | ||
1637 | set_page_attributes(mod->module_core, | ||
1638 | mod->module_core + mod->core_text_size, | ||
1639 | set_memory_rw); | ||
1640 | } | ||
1641 | if ((mod->module_init) && (mod->init_text_size)) { | ||
1642 | set_page_attributes(mod->module_init, | ||
1643 | mod->module_init + mod->init_text_size, | ||
1644 | set_memory_rw); | ||
1645 | } | ||
1646 | } | ||
1647 | mutex_unlock(&module_mutex); | ||
1648 | } | ||
1649 | |||
1650 | /* Iterate through all modules and set each module's text as RO */ | ||
1651 | void set_all_modules_text_ro() | ||
1652 | { | ||
1653 | struct module *mod; | ||
1654 | |||
1655 | mutex_lock(&module_mutex); | ||
1656 | list_for_each_entry_rcu(mod, &modules, list) { | ||
1657 | if ((mod->module_core) && (mod->core_text_size)) { | ||
1658 | set_page_attributes(mod->module_core, | ||
1659 | mod->module_core + mod->core_text_size, | ||
1660 | set_memory_ro); | ||
1661 | } | ||
1662 | if ((mod->module_init) && (mod->init_text_size)) { | ||
1663 | set_page_attributes(mod->module_init, | ||
1664 | mod->module_init + mod->init_text_size, | ||
1665 | set_memory_ro); | ||
1666 | } | ||
1667 | } | ||
1668 | mutex_unlock(&module_mutex); | ||
1669 | } | ||
1670 | #else | ||
1671 | static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } | ||
1672 | static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } | ||
1673 | #endif | ||
1674 | |||
1545 | /* Free a module, remove from lists, etc. */ | 1675 | /* Free a module, remove from lists, etc. */ |
1546 | static void free_module(struct module *mod) | 1676 | static void free_module(struct module *mod) |
1547 | { | 1677 | { |
@@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) | |||
1566 | destroy_params(mod->kp, mod->num_kp); | 1696 | destroy_params(mod->kp, mod->num_kp); |
1567 | 1697 | ||
1568 | /* This may be NULL, but that's OK */ | 1698 | /* This may be NULL, but that's OK */ |
1699 | unset_section_ro_nx(mod, mod->module_init); | ||
1569 | module_free(mod, mod->module_init); | 1700 | module_free(mod, mod->module_init); |
1570 | kfree(mod->args); | 1701 | kfree(mod->args); |
1571 | percpu_modfree(mod); | 1702 | percpu_modfree(mod); |
@@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) | |||
1574 | lockdep_free_key_range(mod->module_core, mod->core_size); | 1705 | lockdep_free_key_range(mod->module_core, mod->core_size); |
1575 | 1706 | ||
1576 | /* Finally, free the core (containing the module structure) */ | 1707 | /* Finally, free the core (containing the module structure) */ |
1708 | unset_section_ro_nx(mod, mod->module_core); | ||
1577 | module_free(mod, mod->module_core); | 1709 | module_free(mod, mod->module_core); |
1578 | 1710 | ||
1579 | #ifdef CONFIG_MPU | 1711 | #ifdef CONFIG_MPU |
@@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
1777 | s->sh_entsize = get_offset(mod, &mod->core_size, s, i); | 1909 | s->sh_entsize = get_offset(mod, &mod->core_size, s, i); |
1778 | DEBUGP("\t%s\n", name); | 1910 | DEBUGP("\t%s\n", name); |
1779 | } | 1911 | } |
1780 | if (m == 0) | 1912 | switch (m) { |
1913 | case 0: /* executable */ | ||
1914 | mod->core_size = debug_align(mod->core_size); | ||
1781 | mod->core_text_size = mod->core_size; | 1915 | mod->core_text_size = mod->core_size; |
1916 | break; | ||
1917 | case 1: /* RO: text and ro-data */ | ||
1918 | mod->core_size = debug_align(mod->core_size); | ||
1919 | mod->core_ro_size = mod->core_size; | ||
1920 | break; | ||
1921 | case 3: /* whole core */ | ||
1922 | mod->core_size = debug_align(mod->core_size); | ||
1923 | break; | ||
1924 | } | ||
1782 | } | 1925 | } |
1783 | 1926 | ||
1784 | DEBUGP("Init section allocation order:\n"); | 1927 | DEBUGP("Init section allocation order:\n"); |
@@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
1796 | | INIT_OFFSET_MASK); | 1939 | | INIT_OFFSET_MASK); |
1797 | DEBUGP("\t%s\n", sname); | 1940 | DEBUGP("\t%s\n", sname); |
1798 | } | 1941 | } |
1799 | if (m == 0) | 1942 | switch (m) { |
1943 | case 0: /* executable */ | ||
1944 | mod->init_size = debug_align(mod->init_size); | ||
1800 | mod->init_text_size = mod->init_size; | 1945 | mod->init_text_size = mod->init_size; |
1946 | break; | ||
1947 | case 1: /* RO: text and ro-data */ | ||
1948 | mod->init_size = debug_align(mod->init_size); | ||
1949 | mod->init_ro_size = mod->init_size; | ||
1950 | break; | ||
1951 | case 3: /* whole init */ | ||
1952 | mod->init_size = debug_align(mod->init_size); | ||
1953 | break; | ||
1954 | } | ||
1801 | } | 1955 | } |
1802 | } | 1956 | } |
1803 | 1957 | ||
@@ -2722,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
2722 | blocking_notifier_call_chain(&module_notify_list, | 2876 | blocking_notifier_call_chain(&module_notify_list, |
2723 | MODULE_STATE_COMING, mod); | 2877 | MODULE_STATE_COMING, mod); |
2724 | 2878 | ||
2879 | /* Set RO and NX regions for core */ | ||
2880 | set_section_ro_nx(mod->module_core, | ||
2881 | mod->core_text_size, | ||
2882 | mod->core_ro_size, | ||
2883 | mod->core_size); | ||
2884 | |||
2885 | /* Set RO and NX regions for init */ | ||
2886 | set_section_ro_nx(mod->module_init, | ||
2887 | mod->init_text_size, | ||
2888 | mod->init_ro_size, | ||
2889 | mod->init_size); | ||
2890 | |||
2725 | do_mod_ctors(mod); | 2891 | do_mod_ctors(mod); |
2726 | /* Start the module */ | 2892 | /* Start the module */ |
2727 | if (mod->init != NULL) | 2893 | if (mod->init != NULL) |
@@ -2765,6 +2931,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
2765 | mod->symtab = mod->core_symtab; | 2931 | mod->symtab = mod->core_symtab; |
2766 | mod->strtab = mod->core_strtab; | 2932 | mod->strtab = mod->core_strtab; |
2767 | #endif | 2933 | #endif |
2934 | unset_section_ro_nx(mod, mod->module_init); | ||
2768 | module_free(mod, mod->module_init); | 2935 | module_free(mod, mod->module_init); |
2769 | mod->module_init = NULL; | 2936 | mod->module_init = NULL; |
2770 | mod->init_size = 0; | 2937 | mod->init_size = 0; |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 200407c1502f..a5889fb28ecf 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
199 | * memory barriers as we'll eventually observe the right | 199 | * memory barriers as we'll eventually observe the right |
200 | * values at the cost of a few extra spins. | 200 | * values at the cost of a few extra spins. |
201 | */ | 201 | */ |
202 | cpu_relax(); | 202 | arch_mutex_cpu_relax(); |
203 | } | 203 | } |
204 | #endif | 204 | #endif |
205 | spin_lock_mutex(&lock->wait_lock, flags); | 205 | spin_lock_mutex(&lock->wait_lock, flags); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9ca4973f736d..93bd2eb2bc53 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer); | |||
145 | 145 | ||
146 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); | 146 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); |
147 | 147 | ||
148 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); | 148 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); |
149 | |||
150 | #define lock_timer(tid, flags) \ | ||
151 | ({ struct k_itimer *__timr; \ | ||
152 | __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ | ||
153 | __timr; \ | ||
154 | }) | ||
149 | 155 | ||
150 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) | 156 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) |
151 | { | 157 | { |
@@ -619,7 +625,7 @@ out: | |||
619 | * the find to the timer lock. To avoid a dead lock, the timer id MUST | 625 | * the find to the timer lock. To avoid a dead lock, the timer id MUST |
620 | * be release with out holding the timer lock. | 626 | * be release with out holding the timer lock. |
621 | */ | 627 | */ |
622 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) | 628 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) |
623 | { | 629 | { |
624 | struct k_itimer *timr; | 630 | struct k_itimer *timr; |
625 | /* | 631 | /* |
diff --git a/kernel/printk.c b/kernel/printk.c index a23315dc4498..ab3ffc5b3b64 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending); | |||
1074 | 1074 | ||
1075 | void printk_tick(void) | 1075 | void printk_tick(void) |
1076 | { | 1076 | { |
1077 | if (__get_cpu_var(printk_pending)) { | 1077 | if (__this_cpu_read(printk_pending)) { |
1078 | __get_cpu_var(printk_pending) = 0; | 1078 | __this_cpu_write(printk_pending, 0); |
1079 | wake_up_interruptible(&log_wait); | 1079 | wake_up_interruptible(&log_wait); |
1080 | } | 1080 | } |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | int printk_needs_cpu(int cpu) | 1083 | int printk_needs_cpu(int cpu) |
1084 | { | 1084 | { |
1085 | if (unlikely(cpu_is_offline(cpu))) | 1085 | if (cpu_is_offline(cpu)) |
1086 | printk_tick(); | 1086 | printk_tick(); |
1087 | return per_cpu(printk_pending, cpu); | 1087 | return __this_cpu_read(printk_pending); |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | void wake_up_klogd(void) | 1090 | void wake_up_klogd(void) |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342ac..034493724749 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -36,31 +36,16 @@ | |||
36 | #include <linux/time.h> | 36 | #include <linux/time.h> |
37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
38 | 38 | ||
39 | /* Global control variables for rcupdate callback mechanism. */ | 39 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ |
40 | struct rcu_ctrlblk { | 40 | static struct task_struct *rcu_kthread_task; |
41 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | 41 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); |
42 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | 42 | static unsigned long have_rcu_kthread_work; |
43 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | 43 | static void invoke_rcu_kthread(void); |
44 | }; | ||
45 | |||
46 | /* Definition for rcupdate control block. */ | ||
47 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||
48 | .donetail = &rcu_sched_ctrlblk.rcucblist, | ||
49 | .curtail = &rcu_sched_ctrlblk.rcucblist, | ||
50 | }; | ||
51 | |||
52 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||
53 | .donetail = &rcu_bh_ctrlblk.rcucblist, | ||
54 | .curtail = &rcu_bh_ctrlblk.rcucblist, | ||
55 | }; | ||
56 | |||
57 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
58 | int rcu_scheduler_active __read_mostly; | ||
59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
61 | 44 | ||
62 | /* Forward declarations for rcutiny_plugin.h. */ | 45 | /* Forward declarations for rcutiny_plugin.h. */ |
63 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | 46 | struct rcu_ctrlblk; |
47 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||
48 | static int rcu_kthread(void *arg); | ||
64 | static void __call_rcu(struct rcu_head *head, | 49 | static void __call_rcu(struct rcu_head *head, |
65 | void (*func)(struct rcu_head *rcu), | 50 | void (*func)(struct rcu_head *rcu), |
66 | struct rcu_ctrlblk *rcp); | 51 | struct rcu_ctrlblk *rcp); |
@@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu) | |||
123 | { | 108 | { |
124 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | 109 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
125 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | 110 | rcu_qsctr_help(&rcu_bh_ctrlblk)) |
126 | raise_softirq(RCU_SOFTIRQ); | 111 | invoke_rcu_kthread(); |
127 | } | 112 | } |
128 | 113 | ||
129 | /* | 114 | /* |
@@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu) | |||
132 | void rcu_bh_qs(int cpu) | 117 | void rcu_bh_qs(int cpu) |
133 | { | 118 | { |
134 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | 119 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) |
135 | raise_softirq(RCU_SOFTIRQ); | 120 | invoke_rcu_kthread(); |
136 | } | 121 | } |
137 | 122 | ||
138 | /* | 123 | /* |
@@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user) | |||
152 | } | 137 | } |
153 | 138 | ||
154 | /* | 139 | /* |
155 | * Helper function for rcu_process_callbacks() that operates on the | 140 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure |
156 | * specified rcu_ctrlkblk structure. | 141 | * whose grace period has elapsed. |
157 | */ | 142 | */ |
158 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | 143 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) |
159 | { | 144 | { |
160 | struct rcu_head *next, *list; | 145 | struct rcu_head *next, *list; |
161 | unsigned long flags; | 146 | unsigned long flags; |
147 | RCU_TRACE(int cb_count = 0); | ||
162 | 148 | ||
163 | /* If no RCU callbacks ready to invoke, just return. */ | 149 | /* If no RCU callbacks ready to invoke, just return. */ |
164 | if (&rcp->rcucblist == rcp->donetail) | 150 | if (&rcp->rcucblist == rcp->donetail) |
@@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
180 | next = list->next; | 166 | next = list->next; |
181 | prefetch(next); | 167 | prefetch(next); |
182 | debug_rcu_head_unqueue(list); | 168 | debug_rcu_head_unqueue(list); |
169 | local_bh_disable(); | ||
183 | list->func(list); | 170 | list->func(list); |
171 | local_bh_enable(); | ||
184 | list = next; | 172 | list = next; |
173 | RCU_TRACE(cb_count++); | ||
185 | } | 174 | } |
175 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | ||
186 | } | 176 | } |
187 | 177 | ||
188 | /* | 178 | /* |
189 | * Invoke any callbacks whose grace period has completed. | 179 | * This kthread invokes RCU callbacks whose grace periods have |
180 | * elapsed. It is awakened as needed, and takes the place of the | ||
181 | * RCU_SOFTIRQ that was used previously for this purpose. | ||
182 | * This is a kthread, but it is never stopped, at least not until | ||
183 | * the system goes down. | ||
190 | */ | 184 | */ |
191 | static void rcu_process_callbacks(struct softirq_action *unused) | 185 | static int rcu_kthread(void *arg) |
192 | { | 186 | { |
193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); | 187 | unsigned long work; |
194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 188 | unsigned long morework; |
195 | rcu_preempt_process_callbacks(); | 189 | unsigned long flags; |
190 | |||
191 | for (;;) { | ||
192 | wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); | ||
193 | morework = rcu_boost(); | ||
194 | local_irq_save(flags); | ||
195 | work = have_rcu_kthread_work; | ||
196 | have_rcu_kthread_work = morework; | ||
197 | local_irq_restore(flags); | ||
198 | if (work) { | ||
199 | rcu_process_callbacks(&rcu_sched_ctrlblk); | ||
200 | rcu_process_callbacks(&rcu_bh_ctrlblk); | ||
201 | rcu_preempt_process_callbacks(); | ||
202 | } | ||
203 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
204 | } | ||
205 | |||
206 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
211 | * or to boost readers. | ||
212 | */ | ||
213 | static void invoke_rcu_kthread(void) | ||
214 | { | ||
215 | unsigned long flags; | ||
216 | |||
217 | local_irq_save(flags); | ||
218 | have_rcu_kthread_work = 1; | ||
219 | wake_up(&rcu_kthread_wq); | ||
220 | local_irq_restore(flags); | ||
196 | } | 221 | } |
197 | 222 | ||
198 | /* | 223 | /* |
@@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head, | |||
230 | local_irq_save(flags); | 255 | local_irq_save(flags); |
231 | *rcp->curtail = head; | 256 | *rcp->curtail = head; |
232 | rcp->curtail = &head->next; | 257 | rcp->curtail = &head->next; |
258 | RCU_TRACE(rcp->qlen++); | ||
233 | local_irq_restore(flags); | 259 | local_irq_restore(flags); |
234 | } | 260 | } |
235 | 261 | ||
@@ -282,7 +308,16 @@ void rcu_barrier_sched(void) | |||
282 | } | 308 | } |
283 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 309 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
284 | 310 | ||
285 | void __init rcu_init(void) | 311 | /* |
312 | * Spawn the kthread that invokes RCU callbacks. | ||
313 | */ | ||
314 | static int __init rcu_spawn_kthreads(void) | ||
286 | { | 315 | { |
287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 316 | struct sched_param sp; |
317 | |||
318 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
319 | sp.sched_priority = RCU_BOOST_PRIO; | ||
320 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
321 | return 0; | ||
288 | } | 322 | } |
323 | early_initcall(rcu_spawn_kthreads); | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6ceca4f745ff..015abaea962a 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -22,6 +22,40 @@ | |||
22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/kthread.h> | ||
26 | #include <linux/debugfs.h> | ||
27 | #include <linux/seq_file.h> | ||
28 | |||
29 | #ifdef CONFIG_RCU_TRACE | ||
30 | #define RCU_TRACE(stmt) stmt | ||
31 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
32 | #define RCU_TRACE(stmt) | ||
33 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
34 | |||
35 | /* Global control variables for rcupdate callback mechanism. */ | ||
36 | struct rcu_ctrlblk { | ||
37 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | ||
38 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | ||
39 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | ||
40 | RCU_TRACE(long qlen); /* Number of pending CBs. */ | ||
41 | }; | ||
42 | |||
43 | /* Definition for rcupdate control block. */ | ||
44 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||
45 | .donetail = &rcu_sched_ctrlblk.rcucblist, | ||
46 | .curtail = &rcu_sched_ctrlblk.rcucblist, | ||
47 | }; | ||
48 | |||
49 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||
50 | .donetail = &rcu_bh_ctrlblk.rcucblist, | ||
51 | .curtail = &rcu_bh_ctrlblk.rcucblist, | ||
52 | }; | ||
53 | |||
54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
55 | int rcu_scheduler_active __read_mostly; | ||
56 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
57 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
58 | |||
25 | #ifdef CONFIG_TINY_PREEMPT_RCU | 59 | #ifdef CONFIG_TINY_PREEMPT_RCU |
26 | 60 | ||
27 | #include <linux/delay.h> | 61 | #include <linux/delay.h> |
@@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk { | |||
46 | struct list_head *gp_tasks; | 80 | struct list_head *gp_tasks; |
47 | /* Pointer to the first task blocking the */ | 81 | /* Pointer to the first task blocking the */ |
48 | /* current grace period, or NULL if there */ | 82 | /* current grace period, or NULL if there */ |
49 | /* is not such task. */ | 83 | /* is no such task. */ |
50 | struct list_head *exp_tasks; | 84 | struct list_head *exp_tasks; |
51 | /* Pointer to first task blocking the */ | 85 | /* Pointer to first task blocking the */ |
52 | /* current expedited grace period, or NULL */ | 86 | /* current expedited grace period, or NULL */ |
53 | /* if there is no such task. If there */ | 87 | /* if there is no such task. If there */ |
54 | /* is no current expedited grace period, */ | 88 | /* is no current expedited grace period, */ |
55 | /* then there cannot be any such task. */ | 89 | /* then there cannot be any such task. */ |
90 | #ifdef CONFIG_RCU_BOOST | ||
91 | struct list_head *boost_tasks; | ||
92 | /* Pointer to first task that needs to be */ | ||
93 | /* priority-boosted, or NULL if no priority */ | ||
94 | /* boosting is needed. If there is no */ | ||
95 | /* current or expedited grace period, there */ | ||
96 | /* can be no such task. */ | ||
97 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
56 | u8 gpnum; /* Current grace period. */ | 98 | u8 gpnum; /* Current grace period. */ |
57 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | 99 | u8 gpcpu; /* Last grace period blocked by the CPU. */ |
58 | u8 completed; /* Last grace period completed. */ | 100 | u8 completed; /* Last grace period completed. */ |
59 | /* If all three are equal, RCU is idle. */ | 101 | /* If all three are equal, RCU is idle. */ |
102 | #ifdef CONFIG_RCU_BOOST | ||
103 | s8 boosted_this_gp; /* Has boosting already happened? */ | ||
104 | unsigned long boost_time; /* When to start boosting (jiffies) */ | ||
105 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
106 | #ifdef CONFIG_RCU_TRACE | ||
107 | unsigned long n_grace_periods; | ||
108 | #ifdef CONFIG_RCU_BOOST | ||
109 | unsigned long n_tasks_boosted; | ||
110 | unsigned long n_exp_boosts; | ||
111 | unsigned long n_normal_boosts; | ||
112 | unsigned long n_normal_balk_blkd_tasks; | ||
113 | unsigned long n_normal_balk_gp_tasks; | ||
114 | unsigned long n_normal_balk_boost_tasks; | ||
115 | unsigned long n_normal_balk_boosted; | ||
116 | unsigned long n_normal_balk_notyet; | ||
117 | unsigned long n_normal_balk_nos; | ||
118 | unsigned long n_exp_balk_blkd_tasks; | ||
119 | unsigned long n_exp_balk_nos; | ||
120 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
121 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
60 | }; | 122 | }; |
61 | 123 | ||
62 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | 124 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { |
@@ -122,6 +184,210 @@ static int rcu_preempt_gp_in_progress(void) | |||
122 | } | 184 | } |
123 | 185 | ||
124 | /* | 186 | /* |
187 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
188 | * returning NULL if at the end of the list. | ||
189 | */ | ||
190 | static struct list_head *rcu_next_node_entry(struct task_struct *t) | ||
191 | { | ||
192 | struct list_head *np; | ||
193 | |||
194 | np = t->rcu_node_entry.next; | ||
195 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
196 | np = NULL; | ||
197 | return np; | ||
198 | } | ||
199 | |||
200 | #ifdef CONFIG_RCU_TRACE | ||
201 | |||
202 | #ifdef CONFIG_RCU_BOOST | ||
203 | static void rcu_initiate_boost_trace(void); | ||
204 | static void rcu_initiate_exp_boost_trace(void); | ||
205 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
206 | |||
207 | /* | ||
208 | * Dump additional statistice for TINY_PREEMPT_RCU. | ||
209 | */ | ||
210 | static void show_tiny_preempt_stats(struct seq_file *m) | ||
211 | { | ||
212 | seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n", | ||
213 | rcu_preempt_ctrlblk.rcb.qlen, | ||
214 | rcu_preempt_ctrlblk.n_grace_periods, | ||
215 | rcu_preempt_ctrlblk.gpnum, | ||
216 | rcu_preempt_ctrlblk.gpcpu, | ||
217 | rcu_preempt_ctrlblk.completed, | ||
218 | "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)], | ||
219 | "N."[!rcu_preempt_ctrlblk.gp_tasks], | ||
220 | "E."[!rcu_preempt_ctrlblk.exp_tasks]); | ||
221 | #ifdef CONFIG_RCU_BOOST | ||
222 | seq_printf(m, " ttb=%c btg=", | ||
223 | "B."[!rcu_preempt_ctrlblk.boost_tasks]); | ||
224 | switch (rcu_preempt_ctrlblk.boosted_this_gp) { | ||
225 | case -1: | ||
226 | seq_puts(m, "exp"); | ||
227 | break; | ||
228 | case 0: | ||
229 | seq_puts(m, "no"); | ||
230 | break; | ||
231 | case 1: | ||
232 | seq_puts(m, "begun"); | ||
233 | break; | ||
234 | case 2: | ||
235 | seq_puts(m, "done"); | ||
236 | break; | ||
237 | default: | ||
238 | seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); | ||
239 | } | ||
240 | seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", | ||
241 | rcu_preempt_ctrlblk.n_tasks_boosted, | ||
242 | rcu_preempt_ctrlblk.n_exp_boosts, | ||
243 | rcu_preempt_ctrlblk.n_normal_boosts, | ||
244 | (int)(jiffies & 0xffff), | ||
245 | (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); | ||
246 | seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", | ||
247 | "normal balk", | ||
248 | rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, | ||
249 | rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, | ||
250 | rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, | ||
251 | rcu_preempt_ctrlblk.n_normal_balk_boosted, | ||
252 | rcu_preempt_ctrlblk.n_normal_balk_notyet, | ||
253 | rcu_preempt_ctrlblk.n_normal_balk_nos); | ||
254 | seq_printf(m, " exp balk: bt=%lu nos=%lu\n", | ||
255 | rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, | ||
256 | rcu_preempt_ctrlblk.n_exp_balk_nos); | ||
257 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
258 | } | ||
259 | |||
260 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
261 | |||
262 | #ifdef CONFIG_RCU_BOOST | ||
263 | |||
264 | #include "rtmutex_common.h" | ||
265 | |||
266 | /* | ||
267 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | ||
268 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | ||
269 | */ | ||
270 | static int rcu_boost(void) | ||
271 | { | ||
272 | unsigned long flags; | ||
273 | struct rt_mutex mtx; | ||
274 | struct list_head *np; | ||
275 | struct task_struct *t; | ||
276 | |||
277 | if (rcu_preempt_ctrlblk.boost_tasks == NULL) | ||
278 | return 0; /* Nothing to boost. */ | ||
279 | raw_local_irq_save(flags); | ||
280 | rcu_preempt_ctrlblk.boosted_this_gp++; | ||
281 | t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, | ||
282 | rcu_node_entry); | ||
283 | np = rcu_next_node_entry(t); | ||
284 | rt_mutex_init_proxy_locked(&mtx, t); | ||
285 | t->rcu_boost_mutex = &mtx; | ||
286 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | ||
287 | raw_local_irq_restore(flags); | ||
288 | rt_mutex_lock(&mtx); | ||
289 | RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); | ||
290 | rcu_preempt_ctrlblk.boosted_this_gp++; | ||
291 | rt_mutex_unlock(&mtx); | ||
292 | return rcu_preempt_ctrlblk.boost_tasks != NULL; | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Check to see if it is now time to start boosting RCU readers blocking | ||
297 | * the current grace period, and, if so, tell the rcu_kthread_task to | ||
298 | * start boosting them. If there is an expedited boost in progress, | ||
299 | * we wait for it to complete. | ||
300 | * | ||
301 | * If there are no blocked readers blocking the current grace period, | ||
302 | * return 0 to let the caller know, otherwise return 1. Note that this | ||
303 | * return value is independent of whether or not boosting was done. | ||
304 | */ | ||
305 | static int rcu_initiate_boost(void) | ||
306 | { | ||
307 | if (!rcu_preempt_blocked_readers_cgp()) { | ||
308 | RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); | ||
309 | return 0; | ||
310 | } | ||
311 | if (rcu_preempt_ctrlblk.gp_tasks != NULL && | ||
312 | rcu_preempt_ctrlblk.boost_tasks == NULL && | ||
313 | rcu_preempt_ctrlblk.boosted_this_gp == 0 && | ||
314 | ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { | ||
315 | rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; | ||
316 | invoke_rcu_kthread(); | ||
317 | RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); | ||
318 | } else | ||
319 | RCU_TRACE(rcu_initiate_boost_trace()); | ||
320 | return 1; | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Initiate boosting for an expedited grace period. | ||
325 | */ | ||
326 | static void rcu_initiate_expedited_boost(void) | ||
327 | { | ||
328 | unsigned long flags; | ||
329 | |||
330 | raw_local_irq_save(flags); | ||
331 | if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { | ||
332 | rcu_preempt_ctrlblk.boost_tasks = | ||
333 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
334 | rcu_preempt_ctrlblk.boosted_this_gp = -1; | ||
335 | invoke_rcu_kthread(); | ||
336 | RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); | ||
337 | } else | ||
338 | RCU_TRACE(rcu_initiate_exp_boost_trace()); | ||
339 | raw_local_irq_restore(flags); | ||
340 | } | ||
341 | |||
342 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); | ||
343 | |||
344 | /* | ||
345 | * Do priority-boost accounting for the start of a new grace period. | ||
346 | */ | ||
347 | static void rcu_preempt_boost_start_gp(void) | ||
348 | { | ||
349 | rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; | ||
350 | if (rcu_preempt_ctrlblk.boosted_this_gp > 0) | ||
351 | rcu_preempt_ctrlblk.boosted_this_gp = 0; | ||
352 | } | ||
353 | |||
354 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
355 | |||
356 | /* | ||
357 | * If there is no RCU priority boosting, we don't boost. | ||
358 | */ | ||
359 | static int rcu_boost(void) | ||
360 | { | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * If there is no RCU priority boosting, we don't initiate boosting, | ||
366 | * but we do indicate whether there are blocked readers blocking the | ||
367 | * current grace period. | ||
368 | */ | ||
369 | static int rcu_initiate_boost(void) | ||
370 | { | ||
371 | return rcu_preempt_blocked_readers_cgp(); | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * If there is no RCU priority boosting, we don't initiate expedited boosting. | ||
376 | */ | ||
377 | static void rcu_initiate_expedited_boost(void) | ||
378 | { | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * If there is no RCU priority boosting, nothing to do at grace-period start. | ||
383 | */ | ||
384 | static void rcu_preempt_boost_start_gp(void) | ||
385 | { | ||
386 | } | ||
387 | |||
388 | #endif /* else #ifdef CONFIG_RCU_BOOST */ | ||
389 | |||
390 | /* | ||
125 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | 391 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
126 | * that this just means that the task currently running on the CPU is | 392 | * that this just means that the task currently running on the CPU is |
127 | * in a quiescent state. There might be any number of tasks blocked | 393 | * in a quiescent state. There might be any number of tasks blocked |
@@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void) | |||
148 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | 414 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; |
149 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 415 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
150 | 416 | ||
417 | /* If there is no GP then there is nothing more to do. */ | ||
418 | if (!rcu_preempt_gp_in_progress()) | ||
419 | return; | ||
151 | /* | 420 | /* |
152 | * If there is no GP, or if blocked readers are still blocking GP, | 421 | * Check up on boosting. If there are no readers blocking the |
153 | * then there is nothing more to do. | 422 | * current grace period, leave. |
154 | */ | 423 | */ |
155 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | 424 | if (rcu_initiate_boost()) |
156 | return; | 425 | return; |
157 | 426 | ||
158 | /* Advance callbacks. */ | 427 | /* Advance callbacks. */ |
@@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void) | |||
164 | if (!rcu_preempt_blocked_readers_any()) | 433 | if (!rcu_preempt_blocked_readers_any()) |
165 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | 434 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; |
166 | 435 | ||
167 | /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | 436 | /* If there are done callbacks, cause them to be invoked. */ |
168 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 437 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
169 | raise_softirq(RCU_SOFTIRQ); | 438 | invoke_rcu_kthread(); |
170 | } | 439 | } |
171 | 440 | ||
172 | /* | 441 | /* |
@@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void) | |||
178 | 447 | ||
179 | /* Official start of GP. */ | 448 | /* Official start of GP. */ |
180 | rcu_preempt_ctrlblk.gpnum++; | 449 | rcu_preempt_ctrlblk.gpnum++; |
450 | RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); | ||
181 | 451 | ||
182 | /* Any blocked RCU readers block new GP. */ | 452 | /* Any blocked RCU readers block new GP. */ |
183 | if (rcu_preempt_blocked_readers_any()) | 453 | if (rcu_preempt_blocked_readers_any()) |
184 | rcu_preempt_ctrlblk.gp_tasks = | 454 | rcu_preempt_ctrlblk.gp_tasks = |
185 | rcu_preempt_ctrlblk.blkd_tasks.next; | 455 | rcu_preempt_ctrlblk.blkd_tasks.next; |
186 | 456 | ||
457 | /* Set up for RCU priority boosting. */ | ||
458 | rcu_preempt_boost_start_gp(); | ||
459 | |||
187 | /* If there is no running reader, CPU is done with GP. */ | 460 | /* If there is no running reader, CPU is done with GP. */ |
188 | if (!rcu_preempt_running_reader()) | 461 | if (!rcu_preempt_running_reader()) |
189 | rcu_preempt_cpu_qs(); | 462 | rcu_preempt_cpu_qs(); |
@@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
304 | */ | 577 | */ |
305 | empty = !rcu_preempt_blocked_readers_cgp(); | 578 | empty = !rcu_preempt_blocked_readers_cgp(); |
306 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | 579 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; |
307 | np = t->rcu_node_entry.next; | 580 | np = rcu_next_node_entry(t); |
308 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
309 | np = NULL; | ||
310 | list_del(&t->rcu_node_entry); | 581 | list_del(&t->rcu_node_entry); |
311 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | 582 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) |
312 | rcu_preempt_ctrlblk.gp_tasks = np; | 583 | rcu_preempt_ctrlblk.gp_tasks = np; |
313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | 584 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) |
314 | rcu_preempt_ctrlblk.exp_tasks = np; | 585 | rcu_preempt_ctrlblk.exp_tasks = np; |
586 | #ifdef CONFIG_RCU_BOOST | ||
587 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) | ||
588 | rcu_preempt_ctrlblk.boost_tasks = np; | ||
589 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
315 | INIT_LIST_HEAD(&t->rcu_node_entry); | 590 | INIT_LIST_HEAD(&t->rcu_node_entry); |
316 | 591 | ||
317 | /* | 592 | /* |
@@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
331 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | 606 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) |
332 | rcu_report_exp_done(); | 607 | rcu_report_exp_done(); |
333 | } | 608 | } |
609 | #ifdef CONFIG_RCU_BOOST | ||
610 | /* Unboost self if was boosted. */ | ||
611 | if (special & RCU_READ_UNLOCK_BOOSTED) { | ||
612 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
613 | rt_mutex_unlock(t->rcu_boost_mutex); | ||
614 | t->rcu_boost_mutex = NULL; | ||
615 | } | ||
616 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
334 | local_irq_restore(flags); | 617 | local_irq_restore(flags); |
335 | } | 618 | } |
336 | 619 | ||
@@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void) | |||
374 | rcu_preempt_cpu_qs(); | 657 | rcu_preempt_cpu_qs(); |
375 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 658 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
376 | rcu_preempt_ctrlblk.rcb.donetail) | 659 | rcu_preempt_ctrlblk.rcb.donetail) |
377 | raise_softirq(RCU_SOFTIRQ); | 660 | invoke_rcu_kthread(); |
378 | if (rcu_preempt_gp_in_progress() && | 661 | if (rcu_preempt_gp_in_progress() && |
379 | rcu_cpu_blocking_cur_gp() && | 662 | rcu_cpu_blocking_cur_gp() && |
380 | rcu_preempt_running_reader()) | 663 | rcu_preempt_running_reader()) |
@@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void) | |||
383 | 666 | ||
384 | /* | 667 | /* |
385 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | 668 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to |
386 | * update, so this is invoked from __rcu_process_callbacks() to | 669 | * update, so this is invoked from rcu_process_callbacks() to |
387 | * handle that case. Of course, it is invoked for all flavors of | 670 | * handle that case. Of course, it is invoked for all flavors of |
388 | * RCU, but RCU callbacks can appear only on one of the lists, and | 671 | * RCU, but RCU callbacks can appear only on one of the lists, and |
389 | * neither ->nexttail nor ->donetail can possibly be NULL, so there | 672 | * neither ->nexttail nor ->donetail can possibly be NULL, so there |
@@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |||
400 | */ | 683 | */ |
401 | static void rcu_preempt_process_callbacks(void) | 684 | static void rcu_preempt_process_callbacks(void) |
402 | { | 685 | { |
403 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | 686 | rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); |
404 | } | 687 | } |
405 | 688 | ||
406 | /* | 689 | /* |
@@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
417 | local_irq_save(flags); | 700 | local_irq_save(flags); |
418 | *rcu_preempt_ctrlblk.nexttail = head; | 701 | *rcu_preempt_ctrlblk.nexttail = head; |
419 | rcu_preempt_ctrlblk.nexttail = &head->next; | 702 | rcu_preempt_ctrlblk.nexttail = &head->next; |
703 | RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++); | ||
420 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ | 704 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ |
421 | local_irq_restore(flags); | 705 | local_irq_restore(flags); |
422 | } | 706 | } |
@@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void) | |||
532 | 816 | ||
533 | /* Wait for tail of ->blkd_tasks list to drain. */ | 817 | /* Wait for tail of ->blkd_tasks list to drain. */ |
534 | if (rcu_preempted_readers_exp()) | 818 | if (rcu_preempted_readers_exp()) |
819 | rcu_initiate_expedited_boost(); | ||
535 | wait_event(sync_rcu_preempt_exp_wq, | 820 | wait_event(sync_rcu_preempt_exp_wq, |
536 | !rcu_preempted_readers_exp()); | 821 | !rcu_preempted_readers_exp()); |
537 | 822 | ||
@@ -572,6 +857,27 @@ void exit_rcu(void) | |||
572 | 857 | ||
573 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
574 | 859 | ||
860 | #ifdef CONFIG_RCU_TRACE | ||
861 | |||
862 | /* | ||
863 | * Because preemptible RCU does not exist, it is not necessary to | ||
864 | * dump out its statistics. | ||
865 | */ | ||
866 | static void show_tiny_preempt_stats(struct seq_file *m) | ||
867 | { | ||
868 | } | ||
869 | |||
870 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
871 | |||
872 | /* | ||
873 | * Because preemptible RCU does not exist, it is never necessary to | ||
874 | * boost preempted RCU readers. | ||
875 | */ | ||
876 | static int rcu_boost(void) | ||
877 | { | ||
878 | return 0; | ||
879 | } | ||
880 | |||
575 | /* | 881 | /* |
576 | * Because preemptible RCU does not exist, it never has any callbacks | 882 | * Because preemptible RCU does not exist, it never has any callbacks |
577 | * to check. | 883 | * to check. |
@@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void) | |||
599 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | 905 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ |
600 | 906 | ||
601 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 907 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
602 | |||
603 | #include <linux/kernel_stat.h> | 908 | #include <linux/kernel_stat.h> |
604 | 909 | ||
605 | /* | 910 | /* |
606 | * During boot, we forgive RCU lockdep issues. After this function is | 911 | * During boot, we forgive RCU lockdep issues. After this function is |
607 | * invoked, we start taking RCU lockdep issues seriously. | 912 | * invoked, we start taking RCU lockdep issues seriously. |
608 | */ | 913 | */ |
609 | void rcu_scheduler_starting(void) | 914 | void __init rcu_scheduler_starting(void) |
610 | { | 915 | { |
611 | WARN_ON(nr_context_switches() > 0); | 916 | WARN_ON(nr_context_switches() > 0); |
612 | rcu_scheduler_active = 1; | 917 | rcu_scheduler_active = 1; |
613 | } | 918 | } |
614 | 919 | ||
615 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 920 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
921 | |||
922 | #ifdef CONFIG_RCU_BOOST | ||
923 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
924 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
925 | #define RCU_BOOST_PRIO 1 | ||
926 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
927 | |||
928 | #ifdef CONFIG_RCU_TRACE | ||
929 | |||
930 | #ifdef CONFIG_RCU_BOOST | ||
931 | |||
932 | static void rcu_initiate_boost_trace(void) | ||
933 | { | ||
934 | if (rcu_preempt_ctrlblk.gp_tasks == NULL) | ||
935 | rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; | ||
936 | else if (rcu_preempt_ctrlblk.boost_tasks != NULL) | ||
937 | rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; | ||
938 | else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) | ||
939 | rcu_preempt_ctrlblk.n_normal_balk_boosted++; | ||
940 | else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) | ||
941 | rcu_preempt_ctrlblk.n_normal_balk_notyet++; | ||
942 | else | ||
943 | rcu_preempt_ctrlblk.n_normal_balk_nos++; | ||
944 | } | ||
945 | |||
946 | static void rcu_initiate_exp_boost_trace(void) | ||
947 | { | ||
948 | if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) | ||
949 | rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; | ||
950 | else | ||
951 | rcu_preempt_ctrlblk.n_exp_balk_nos++; | ||
952 | } | ||
953 | |||
954 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
955 | |||
956 | static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) | ||
957 | { | ||
958 | unsigned long flags; | ||
959 | |||
960 | raw_local_irq_save(flags); | ||
961 | rcp->qlen -= n; | ||
962 | raw_local_irq_restore(flags); | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Dump statistics for TINY_RCU, such as they are. | ||
967 | */ | ||
968 | static int show_tiny_stats(struct seq_file *m, void *unused) | ||
969 | { | ||
970 | show_tiny_preempt_stats(m); | ||
971 | seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); | ||
972 | seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); | ||
973 | return 0; | ||
974 | } | ||
975 | |||
976 | static int show_tiny_stats_open(struct inode *inode, struct file *file) | ||
977 | { | ||
978 | return single_open(file, show_tiny_stats, NULL); | ||
979 | } | ||
980 | |||
981 | static const struct file_operations show_tiny_stats_fops = { | ||
982 | .owner = THIS_MODULE, | ||
983 | .open = show_tiny_stats_open, | ||
984 | .read = seq_read, | ||
985 | .llseek = seq_lseek, | ||
986 | .release = single_release, | ||
987 | }; | ||
988 | |||
989 | static struct dentry *rcudir; | ||
990 | |||
991 | static int __init rcutiny_trace_init(void) | ||
992 | { | ||
993 | struct dentry *retval; | ||
994 | |||
995 | rcudir = debugfs_create_dir("rcu", NULL); | ||
996 | if (!rcudir) | ||
997 | goto free_out; | ||
998 | retval = debugfs_create_file("rcudata", 0444, rcudir, | ||
999 | NULL, &show_tiny_stats_fops); | ||
1000 | if (!retval) | ||
1001 | goto free_out; | ||
1002 | return 0; | ||
1003 | free_out: | ||
1004 | debugfs_remove_recursive(rcudir); | ||
1005 | return 1; | ||
1006 | } | ||
1007 | |||
1008 | static void __exit rcutiny_trace_cleanup(void) | ||
1009 | { | ||
1010 | debugfs_remove_recursive(rcudir); | ||
1011 | } | ||
1012 | |||
1013 | module_init(rcutiny_trace_init); | ||
1014 | module_exit(rcutiny_trace_cleanup); | ||
1015 | |||
1016 | MODULE_AUTHOR("Paul E. McKenney"); | ||
1017 | MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); | ||
1018 | MODULE_LICENSE("GPL"); | ||
1019 | |||
1020 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9d8e8fb2515f..89613f97ff26 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <asm/byteorder.h> | 49 | #include <asm/byteorder.h> |
50 | #include <linux/sched.h> | ||
50 | 51 | ||
51 | MODULE_LICENSE("GPL"); | 52 | MODULE_LICENSE("GPL"); |
52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 53 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
@@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ | |||
64 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ | 65 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ |
65 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ | 66 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ |
66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 67 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
68 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ | ||
69 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ | ||
70 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ | ||
67 | static char *torture_type = "rcu"; /* What RCU implementation to torture. */ | 71 | static char *torture_type = "rcu"; /* What RCU implementation to torture. */ |
68 | 72 | ||
69 | module_param(nreaders, int, 0444); | 73 | module_param(nreaders, int, 0444); |
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444); | |||
88 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 92 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
89 | module_param(fqs_stutter, int, 0444); | 93 | module_param(fqs_stutter, int, 0444); |
90 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 94 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
95 | module_param(test_boost, int, 0444); | ||
96 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); | ||
97 | module_param(test_boost_interval, int, 0444); | ||
98 | MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); | ||
99 | module_param(test_boost_duration, int, 0444); | ||
100 | MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); | ||
91 | module_param(torture_type, charp, 0444); | 101 | module_param(torture_type, charp, 0444); |
92 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); | 102 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); |
93 | 103 | ||
@@ -109,6 +119,7 @@ static struct task_struct *stats_task; | |||
109 | static struct task_struct *shuffler_task; | 119 | static struct task_struct *shuffler_task; |
110 | static struct task_struct *stutter_task; | 120 | static struct task_struct *stutter_task; |
111 | static struct task_struct *fqs_task; | 121 | static struct task_struct *fqs_task; |
122 | static struct task_struct *boost_tasks[NR_CPUS]; | ||
112 | 123 | ||
113 | #define RCU_TORTURE_PIPE_LEN 10 | 124 | #define RCU_TORTURE_PIPE_LEN 10 |
114 | 125 | ||
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail; | |||
134 | static atomic_t n_rcu_torture_free; | 145 | static atomic_t n_rcu_torture_free; |
135 | static atomic_t n_rcu_torture_mberror; | 146 | static atomic_t n_rcu_torture_mberror; |
136 | static atomic_t n_rcu_torture_error; | 147 | static atomic_t n_rcu_torture_error; |
148 | static long n_rcu_torture_boost_ktrerror; | ||
149 | static long n_rcu_torture_boost_rterror; | ||
150 | static long n_rcu_torture_boost_allocerror; | ||
151 | static long n_rcu_torture_boost_afferror; | ||
152 | static long n_rcu_torture_boost_failure; | ||
153 | static long n_rcu_torture_boosts; | ||
137 | static long n_rcu_torture_timers; | 154 | static long n_rcu_torture_timers; |
138 | static struct list_head rcu_torture_removed; | 155 | static struct list_head rcu_torture_removed; |
139 | static cpumask_var_t shuffle_tmp_mask; | 156 | static cpumask_var_t shuffle_tmp_mask; |
@@ -147,6 +164,16 @@ static int stutter_pause_test; | |||
147 | #endif | 164 | #endif |
148 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | 165 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; |
149 | 166 | ||
167 | #ifdef CONFIG_RCU_BOOST | ||
168 | #define rcu_can_boost() 1 | ||
169 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
170 | #define rcu_can_boost() 0 | ||
171 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
172 | |||
173 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | ||
174 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | ||
175 | /* and boost task create/destroy. */ | ||
176 | |||
150 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | 177 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ |
151 | 178 | ||
152 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ | 179 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ |
@@ -277,6 +304,7 @@ struct rcu_torture_ops { | |||
277 | void (*fqs)(void); | 304 | void (*fqs)(void); |
278 | int (*stats)(char *page); | 305 | int (*stats)(char *page); |
279 | int irq_capable; | 306 | int irq_capable; |
307 | int can_boost; | ||
280 | char *name; | 308 | char *name; |
281 | }; | 309 | }; |
282 | 310 | ||
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = { | |||
366 | .fqs = rcu_force_quiescent_state, | 394 | .fqs = rcu_force_quiescent_state, |
367 | .stats = NULL, | 395 | .stats = NULL, |
368 | .irq_capable = 1, | 396 | .irq_capable = 1, |
397 | .can_boost = rcu_can_boost(), | ||
369 | .name = "rcu" | 398 | .name = "rcu" |
370 | }; | 399 | }; |
371 | 400 | ||
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
408 | .fqs = rcu_force_quiescent_state, | 437 | .fqs = rcu_force_quiescent_state, |
409 | .stats = NULL, | 438 | .stats = NULL, |
410 | .irq_capable = 1, | 439 | .irq_capable = 1, |
440 | .can_boost = rcu_can_boost(), | ||
411 | .name = "rcu_sync" | 441 | .name = "rcu_sync" |
412 | }; | 442 | }; |
413 | 443 | ||
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { | |||
424 | .fqs = rcu_force_quiescent_state, | 454 | .fqs = rcu_force_quiescent_state, |
425 | .stats = NULL, | 455 | .stats = NULL, |
426 | .irq_capable = 1, | 456 | .irq_capable = 1, |
457 | .can_boost = rcu_can_boost(), | ||
427 | .name = "rcu_expedited" | 458 | .name = "rcu_expedited" |
428 | }; | 459 | }; |
429 | 460 | ||
@@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
684 | }; | 715 | }; |
685 | 716 | ||
686 | /* | 717 | /* |
718 | * RCU torture priority-boost testing. Runs one real-time thread per | ||
719 | * CPU for moderate bursts, repeatedly registering RCU callbacks and | ||
720 | * spinning waiting for them to be invoked. If a given callback takes | ||
721 | * too long to be invoked, we assume that priority inversion has occurred. | ||
722 | */ | ||
723 | |||
724 | struct rcu_boost_inflight { | ||
725 | struct rcu_head rcu; | ||
726 | int inflight; | ||
727 | }; | ||
728 | |||
729 | static void rcu_torture_boost_cb(struct rcu_head *head) | ||
730 | { | ||
731 | struct rcu_boost_inflight *rbip = | ||
732 | container_of(head, struct rcu_boost_inflight, rcu); | ||
733 | |||
734 | smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ | ||
735 | rbip->inflight = 0; | ||
736 | } | ||
737 | |||
738 | static int rcu_torture_boost(void *arg) | ||
739 | { | ||
740 | unsigned long call_rcu_time; | ||
741 | unsigned long endtime; | ||
742 | unsigned long oldstarttime; | ||
743 | struct rcu_boost_inflight rbi = { .inflight = 0 }; | ||
744 | struct sched_param sp; | ||
745 | |||
746 | VERBOSE_PRINTK_STRING("rcu_torture_boost started"); | ||
747 | |||
748 | /* Set real-time priority. */ | ||
749 | sp.sched_priority = 1; | ||
750 | if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { | ||
751 | VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); | ||
752 | n_rcu_torture_boost_rterror++; | ||
753 | } | ||
754 | |||
755 | /* Each pass through the following loop does one boost-test cycle. */ | ||
756 | do { | ||
757 | /* Wait for the next test interval. */ | ||
758 | oldstarttime = boost_starttime; | ||
759 | while (jiffies - oldstarttime > ULONG_MAX / 2) { | ||
760 | schedule_timeout_uninterruptible(1); | ||
761 | rcu_stutter_wait("rcu_torture_boost"); | ||
762 | if (kthread_should_stop() || | ||
763 | fullstop != FULLSTOP_DONTSTOP) | ||
764 | goto checkwait; | ||
765 | } | ||
766 | |||
767 | /* Do one boost-test interval. */ | ||
768 | endtime = oldstarttime + test_boost_duration * HZ; | ||
769 | call_rcu_time = jiffies; | ||
770 | while (jiffies - endtime > ULONG_MAX / 2) { | ||
771 | /* If we don't have a callback in flight, post one. */ | ||
772 | if (!rbi.inflight) { | ||
773 | smp_mb(); /* RCU core before ->inflight = 1. */ | ||
774 | rbi.inflight = 1; | ||
775 | call_rcu(&rbi.rcu, rcu_torture_boost_cb); | ||
776 | if (jiffies - call_rcu_time > | ||
777 | test_boost_duration * HZ - HZ / 2) { | ||
778 | VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); | ||
779 | n_rcu_torture_boost_failure++; | ||
780 | } | ||
781 | call_rcu_time = jiffies; | ||
782 | } | ||
783 | cond_resched(); | ||
784 | rcu_stutter_wait("rcu_torture_boost"); | ||
785 | if (kthread_should_stop() || | ||
786 | fullstop != FULLSTOP_DONTSTOP) | ||
787 | goto checkwait; | ||
788 | } | ||
789 | |||
790 | /* | ||
791 | * Set the start time of the next test interval. | ||
792 | * Yes, this is vulnerable to long delays, but such | ||
793 | * delays simply cause a false negative for the next | ||
794 | * interval. Besides, we are running at RT priority, | ||
795 | * so delays should be relatively rare. | ||
796 | */ | ||
797 | while (oldstarttime == boost_starttime) { | ||
798 | if (mutex_trylock(&boost_mutex)) { | ||
799 | boost_starttime = jiffies + | ||
800 | test_boost_interval * HZ; | ||
801 | n_rcu_torture_boosts++; | ||
802 | mutex_unlock(&boost_mutex); | ||
803 | break; | ||
804 | } | ||
805 | schedule_timeout_uninterruptible(1); | ||
806 | } | ||
807 | |||
808 | /* Go do the stutter. */ | ||
809 | checkwait: rcu_stutter_wait("rcu_torture_boost"); | ||
810 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
811 | |||
812 | /* Clean up and exit. */ | ||
813 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | ||
814 | rcutorture_shutdown_absorb("rcu_torture_boost"); | ||
815 | while (!kthread_should_stop() || rbi.inflight) | ||
816 | schedule_timeout_uninterruptible(1); | ||
817 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | ||
818 | return 0; | ||
819 | } | ||
820 | |||
821 | /* | ||
687 | * RCU torture force-quiescent-state kthread. Repeatedly induces | 822 | * RCU torture force-quiescent-state kthread. Repeatedly induces |
688 | * bursts of calls to force_quiescent_state(), increasing the probability | 823 | * bursts of calls to force_quiescent_state(), increasing the probability |
689 | * of occurrence of some important types of race conditions. | 824 | * of occurrence of some important types of race conditions. |
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page) | |||
933 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); | 1068 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); |
934 | cnt += sprintf(&page[cnt], | 1069 | cnt += sprintf(&page[cnt], |
935 | "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " | 1070 | "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " |
936 | "rtmbe: %d nt: %ld", | 1071 | "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " |
1072 | "rtbf: %ld rtb: %ld nt: %ld", | ||
937 | rcu_torture_current, | 1073 | rcu_torture_current, |
938 | rcu_torture_current_version, | 1074 | rcu_torture_current_version, |
939 | list_empty(&rcu_torture_freelist), | 1075 | list_empty(&rcu_torture_freelist), |
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page) | |||
941 | atomic_read(&n_rcu_torture_alloc_fail), | 1077 | atomic_read(&n_rcu_torture_alloc_fail), |
942 | atomic_read(&n_rcu_torture_free), | 1078 | atomic_read(&n_rcu_torture_free), |
943 | atomic_read(&n_rcu_torture_mberror), | 1079 | atomic_read(&n_rcu_torture_mberror), |
1080 | n_rcu_torture_boost_ktrerror, | ||
1081 | n_rcu_torture_boost_rterror, | ||
1082 | n_rcu_torture_boost_allocerror, | ||
1083 | n_rcu_torture_boost_afferror, | ||
1084 | n_rcu_torture_boost_failure, | ||
1085 | n_rcu_torture_boosts, | ||
944 | n_rcu_torture_timers); | 1086 | n_rcu_torture_timers); |
945 | if (atomic_read(&n_rcu_torture_mberror) != 0) | 1087 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
1088 | n_rcu_torture_boost_ktrerror != 0 || | ||
1089 | n_rcu_torture_boost_rterror != 0 || | ||
1090 | n_rcu_torture_boost_allocerror != 0 || | ||
1091 | n_rcu_torture_boost_afferror != 0 || | ||
1092 | n_rcu_torture_boost_failure != 0) | ||
946 | cnt += sprintf(&page[cnt], " !!!"); | 1093 | cnt += sprintf(&page[cnt], " !!!"); |
947 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | 1094 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); |
948 | if (i > 1) { | 1095 | if (i > 1) { |
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg) | |||
1094 | } | 1241 | } |
1095 | 1242 | ||
1096 | static inline void | 1243 | static inline void |
1097 | rcu_torture_print_module_parms(char *tag) | 1244 | rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) |
1098 | { | 1245 | { |
1099 | printk(KERN_ALERT "%s" TORTURE_FLAG | 1246 | printk(KERN_ALERT "%s" TORTURE_FLAG |
1100 | "--- %s: nreaders=%d nfakewriters=%d " | 1247 | "--- %s: nreaders=%d nfakewriters=%d " |
1101 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " | 1248 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " |
1102 | "shuffle_interval=%d stutter=%d irqreader=%d " | 1249 | "shuffle_interval=%d stutter=%d irqreader=%d " |
1103 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", | 1250 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " |
1251 | "test_boost=%d/%d test_boost_interval=%d " | ||
1252 | "test_boost_duration=%d\n", | ||
1104 | torture_type, tag, nrealreaders, nfakewriters, | 1253 | torture_type, tag, nrealreaders, nfakewriters, |
1105 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, | 1254 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, |
1106 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); | 1255 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, |
1256 | test_boost, cur_ops->can_boost, | ||
1257 | test_boost_interval, test_boost_duration); | ||
1107 | } | 1258 | } |
1108 | 1259 | ||
1109 | static struct notifier_block rcutorture_nb = { | 1260 | static struct notifier_block rcutorture_shutdown_nb = { |
1110 | .notifier_call = rcutorture_shutdown_notify, | 1261 | .notifier_call = rcutorture_shutdown_notify, |
1111 | }; | 1262 | }; |
1112 | 1263 | ||
1264 | static void rcutorture_booster_cleanup(int cpu) | ||
1265 | { | ||
1266 | struct task_struct *t; | ||
1267 | |||
1268 | if (boost_tasks[cpu] == NULL) | ||
1269 | return; | ||
1270 | mutex_lock(&boost_mutex); | ||
1271 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); | ||
1272 | t = boost_tasks[cpu]; | ||
1273 | boost_tasks[cpu] = NULL; | ||
1274 | mutex_unlock(&boost_mutex); | ||
1275 | |||
1276 | /* This must be outside of the mutex, otherwise deadlock! */ | ||
1277 | kthread_stop(t); | ||
1278 | } | ||
1279 | |||
1280 | static int rcutorture_booster_init(int cpu) | ||
1281 | { | ||
1282 | int retval; | ||
1283 | |||
1284 | if (boost_tasks[cpu] != NULL) | ||
1285 | return 0; /* Already created, nothing more to do. */ | ||
1286 | |||
1287 | /* Don't allow time recalculation while creating a new task. */ | ||
1288 | mutex_lock(&boost_mutex); | ||
1289 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | ||
1290 | boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | ||
1291 | "rcu_torture_boost"); | ||
1292 | if (IS_ERR(boost_tasks[cpu])) { | ||
1293 | retval = PTR_ERR(boost_tasks[cpu]); | ||
1294 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | ||
1295 | n_rcu_torture_boost_ktrerror++; | ||
1296 | boost_tasks[cpu] = NULL; | ||
1297 | mutex_unlock(&boost_mutex); | ||
1298 | return retval; | ||
1299 | } | ||
1300 | kthread_bind(boost_tasks[cpu], cpu); | ||
1301 | wake_up_process(boost_tasks[cpu]); | ||
1302 | mutex_unlock(&boost_mutex); | ||
1303 | return 0; | ||
1304 | } | ||
1305 | |||
1306 | static int rcutorture_cpu_notify(struct notifier_block *self, | ||
1307 | unsigned long action, void *hcpu) | ||
1308 | { | ||
1309 | long cpu = (long)hcpu; | ||
1310 | |||
1311 | switch (action) { | ||
1312 | case CPU_ONLINE: | ||
1313 | case CPU_DOWN_FAILED: | ||
1314 | (void)rcutorture_booster_init(cpu); | ||
1315 | break; | ||
1316 | case CPU_DOWN_PREPARE: | ||
1317 | rcutorture_booster_cleanup(cpu); | ||
1318 | break; | ||
1319 | default: | ||
1320 | break; | ||
1321 | } | ||
1322 | return NOTIFY_OK; | ||
1323 | } | ||
1324 | |||
1325 | static struct notifier_block rcutorture_cpu_nb = { | ||
1326 | .notifier_call = rcutorture_cpu_notify, | ||
1327 | }; | ||
1328 | |||
1113 | static void | 1329 | static void |
1114 | rcu_torture_cleanup(void) | 1330 | rcu_torture_cleanup(void) |
1115 | { | 1331 | { |
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void) | |||
1127 | } | 1343 | } |
1128 | fullstop = FULLSTOP_RMMOD; | 1344 | fullstop = FULLSTOP_RMMOD; |
1129 | mutex_unlock(&fullstop_mutex); | 1345 | mutex_unlock(&fullstop_mutex); |
1130 | unregister_reboot_notifier(&rcutorture_nb); | 1346 | unregister_reboot_notifier(&rcutorture_shutdown_nb); |
1131 | if (stutter_task) { | 1347 | if (stutter_task) { |
1132 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); | 1348 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); |
1133 | kthread_stop(stutter_task); | 1349 | kthread_stop(stutter_task); |
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void) | |||
1184 | kthread_stop(fqs_task); | 1400 | kthread_stop(fqs_task); |
1185 | } | 1401 | } |
1186 | fqs_task = NULL; | 1402 | fqs_task = NULL; |
1403 | if ((test_boost == 1 && cur_ops->can_boost) || | ||
1404 | test_boost == 2) { | ||
1405 | unregister_cpu_notifier(&rcutorture_cpu_nb); | ||
1406 | for_each_possible_cpu(i) | ||
1407 | rcutorture_booster_cleanup(i); | ||
1408 | } | ||
1187 | 1409 | ||
1188 | /* Wait for all RCU callbacks to fire. */ | 1410 | /* Wait for all RCU callbacks to fire. */ |
1189 | 1411 | ||
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void) | |||
1195 | if (cur_ops->cleanup) | 1417 | if (cur_ops->cleanup) |
1196 | cur_ops->cleanup(); | 1418 | cur_ops->cleanup(); |
1197 | if (atomic_read(&n_rcu_torture_error)) | 1419 | if (atomic_read(&n_rcu_torture_error)) |
1198 | rcu_torture_print_module_parms("End of test: FAILURE"); | 1420 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); |
1199 | else | 1421 | else |
1200 | rcu_torture_print_module_parms("End of test: SUCCESS"); | 1422 | rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); |
1201 | } | 1423 | } |
1202 | 1424 | ||
1203 | static int __init | 1425 | static int __init |
@@ -1242,7 +1464,7 @@ rcu_torture_init(void) | |||
1242 | nrealreaders = nreaders; | 1464 | nrealreaders = nreaders; |
1243 | else | 1465 | else |
1244 | nrealreaders = 2 * num_online_cpus(); | 1466 | nrealreaders = 2 * num_online_cpus(); |
1245 | rcu_torture_print_module_parms("Start of test"); | 1467 | rcu_torture_print_module_parms(cur_ops, "Start of test"); |
1246 | fullstop = FULLSTOP_DONTSTOP; | 1468 | fullstop = FULLSTOP_DONTSTOP; |
1247 | 1469 | ||
1248 | /* Set up the freelist. */ | 1470 | /* Set up the freelist. */ |
@@ -1263,6 +1485,12 @@ rcu_torture_init(void) | |||
1263 | atomic_set(&n_rcu_torture_free, 0); | 1485 | atomic_set(&n_rcu_torture_free, 0); |
1264 | atomic_set(&n_rcu_torture_mberror, 0); | 1486 | atomic_set(&n_rcu_torture_mberror, 0); |
1265 | atomic_set(&n_rcu_torture_error, 0); | 1487 | atomic_set(&n_rcu_torture_error, 0); |
1488 | n_rcu_torture_boost_ktrerror = 0; | ||
1489 | n_rcu_torture_boost_rterror = 0; | ||
1490 | n_rcu_torture_boost_allocerror = 0; | ||
1491 | n_rcu_torture_boost_afferror = 0; | ||
1492 | n_rcu_torture_boost_failure = 0; | ||
1493 | n_rcu_torture_boosts = 0; | ||
1266 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) | 1494 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) |
1267 | atomic_set(&rcu_torture_wcount[i], 0); | 1495 | atomic_set(&rcu_torture_wcount[i], 0); |
1268 | for_each_possible_cpu(cpu) { | 1496 | for_each_possible_cpu(cpu) { |
@@ -1376,7 +1604,27 @@ rcu_torture_init(void) | |||
1376 | goto unwind; | 1604 | goto unwind; |
1377 | } | 1605 | } |
1378 | } | 1606 | } |
1379 | register_reboot_notifier(&rcutorture_nb); | 1607 | if (test_boost_interval < 1) |
1608 | test_boost_interval = 1; | ||
1609 | if (test_boost_duration < 2) | ||
1610 | test_boost_duration = 2; | ||
1611 | if ((test_boost == 1 && cur_ops->can_boost) || | ||
1612 | test_boost == 2) { | ||
1613 | int retval; | ||
1614 | |||
1615 | boost_starttime = jiffies + test_boost_interval * HZ; | ||
1616 | register_cpu_notifier(&rcutorture_cpu_nb); | ||
1617 | for_each_possible_cpu(i) { | ||
1618 | if (cpu_is_offline(i)) | ||
1619 | continue; /* Heuristic: CPU can go offline. */ | ||
1620 | retval = rcutorture_booster_init(i); | ||
1621 | if (retval < 0) { | ||
1622 | firsterr = retval; | ||
1623 | goto unwind; | ||
1624 | } | ||
1625 | } | ||
1626 | } | ||
1627 | register_reboot_notifier(&rcutorture_shutdown_nb); | ||
1380 | mutex_unlock(&fullstop_mutex); | 1628 | mutex_unlock(&fullstop_mutex); |
1381 | return 0; | 1629 | return 0; |
1382 | 1630 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..d0ddfea6579d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
67 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
68 | .completed = -300, \ | 68 | .completed = -300, \ |
69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
70 | .orphan_cbs_list = NULL, \ | ||
71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ | ||
72 | .orphan_qlen = 0, \ | ||
73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 70 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
74 | .n_force_qs = 0, \ | 71 | .n_force_qs = 0, \ |
75 | .n_force_qs_ngp = 0, \ | 72 | .n_force_qs_ngp = 0, \ |
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void) | |||
620 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 617 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
621 | { | 618 | { |
622 | if (rdp->gpnum != rnp->gpnum) { | 619 | if (rdp->gpnum != rnp->gpnum) { |
623 | rdp->qs_pending = 1; | 620 | /* |
624 | rdp->passed_quiesc = 0; | 621 | * If the current grace period is waiting for this CPU, |
622 | * set up to detect a quiescent state, otherwise don't | ||
623 | * go looking for one. | ||
624 | */ | ||
625 | rdp->gpnum = rnp->gpnum; | 625 | rdp->gpnum = rnp->gpnum; |
626 | if (rnp->qsmask & rdp->grpmask) { | ||
627 | rdp->qs_pending = 1; | ||
628 | rdp->passed_quiesc = 0; | ||
629 | } else | ||
630 | rdp->qs_pending = 0; | ||
626 | } | 631 | } |
627 | } | 632 | } |
628 | 633 | ||
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
681 | 686 | ||
682 | /* Remember that we saw this grace-period completion. */ | 687 | /* Remember that we saw this grace-period completion. */ |
683 | rdp->completed = rnp->completed; | 688 | rdp->completed = rnp->completed; |
689 | |||
690 | /* | ||
691 | * If we were in an extended quiescent state, we may have | ||
692 | * missed some grace periods that others CPUs handled on | ||
693 | * our behalf. Catch up with this state to avoid noting | ||
694 | * spurious new grace periods. If another grace period | ||
695 | * has started, then rnp->gpnum will have advanced, so | ||
696 | * we will detect this later on. | ||
697 | */ | ||
698 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | ||
699 | rdp->gpnum = rdp->completed; | ||
700 | |||
701 | /* | ||
702 | * If RCU does not need a quiescent state from this CPU, | ||
703 | * then make sure that this CPU doesn't go looking for one. | ||
704 | */ | ||
705 | if ((rnp->qsmask & rdp->grpmask) == 0) | ||
706 | rdp->qs_pending = 0; | ||
684 | } | 707 | } |
685 | } | 708 | } |
686 | 709 | ||
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
984 | #ifdef CONFIG_HOTPLUG_CPU | 1007 | #ifdef CONFIG_HOTPLUG_CPU |
985 | 1008 | ||
986 | /* | 1009 | /* |
987 | * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the | 1010 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
988 | * specified flavor of RCU. The callbacks will be adopted by the next | 1011 | * Synchronization is not required because this function executes |
989 | * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever | 1012 | * in stop_machine() context. |
990 | * comes first. Because this is invoked from the CPU_DYING notifier, | ||
991 | * irqs are already disabled. | ||
992 | */ | 1013 | */ |
993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1014 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
994 | { | 1015 | { |
995 | int i; | 1016 | int i; |
1017 | /* current DYING CPU is cleared in the cpu_online_mask */ | ||
1018 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1019 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1020 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
997 | 1021 | ||
998 | if (rdp->nxtlist == NULL) | 1022 | if (rdp->nxtlist == NULL) |
999 | return; /* irqs disabled, so comparison is stable. */ | 1023 | return; /* irqs disabled, so comparison is stable. */ |
1000 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1024 | |
1001 | *rsp->orphan_cbs_tail = rdp->nxtlist; | 1025 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
1002 | rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; | 1026 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
1027 | receive_rdp->qlen += rdp->qlen; | ||
1028 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1029 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1030 | |||
1003 | rdp->nxtlist = NULL; | 1031 | rdp->nxtlist = NULL; |
1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1032 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1005 | rdp->nxttail[i] = &rdp->nxtlist; | 1033 | rdp->nxttail[i] = &rdp->nxtlist; |
1006 | rsp->orphan_qlen += rdp->qlen; | ||
1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1008 | rdp->qlen = 0; | 1034 | rdp->qlen = 0; |
1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * Adopt previously orphaned RCU callbacks. | ||
1014 | */ | ||
1015 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1016 | { | ||
1017 | unsigned long flags; | ||
1018 | struct rcu_data *rdp; | ||
1019 | |||
1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
1021 | rdp = this_cpu_ptr(rsp->rda); | ||
1022 | if (rsp->orphan_cbs_list == NULL) { | ||
1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1024 | return; | ||
1025 | } | ||
1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | ||
1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | ||
1028 | rdp->qlen += rsp->orphan_qlen; | ||
1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
1030 | rsp->orphan_cbs_list = NULL; | ||
1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | ||
1032 | rsp->orphan_qlen = 0; | ||
1033 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1034 | } | 1035 | } |
1035 | 1036 | ||
1036 | /* | 1037 | /* |
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1082 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1082 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1083 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1083 | rcu_report_exp_rnp(rsp, rnp); | 1084 | rcu_report_exp_rnp(rsp, rnp); |
1084 | |||
1085 | rcu_adopt_orphan_cbs(rsp); | ||
1086 | } | 1085 | } |
1087 | 1086 | ||
1088 | /* | 1087 | /* |
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu) | |||
1100 | 1099 | ||
1101 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1100 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1102 | 1101 | ||
1103 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1102 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1108 | { | 1103 | { |
1109 | } | 1104 | } |
1110 | 1105 | ||
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1440 | */ | 1435 | */ |
1441 | local_irq_save(flags); | 1436 | local_irq_save(flags); |
1442 | rdp = this_cpu_ptr(rsp->rda); | 1437 | rdp = this_cpu_ptr(rsp->rda); |
1443 | rcu_process_gp_end(rsp, rdp); | ||
1444 | check_for_new_grace_period(rsp, rdp); | ||
1445 | 1438 | ||
1446 | /* Add the callback to our list. */ | 1439 | /* Add the callback to our list. */ |
1447 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1440 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
1448 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1441 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1449 | 1442 | ||
1450 | /* Start a new grace period if one not already started. */ | ||
1451 | if (!rcu_gp_in_progress(rsp)) { | ||
1452 | unsigned long nestflag; | ||
1453 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1454 | |||
1455 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1456 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | ||
1457 | } | ||
1458 | |||
1459 | /* | 1443 | /* |
1460 | * Force the grace period if too many callbacks or too long waiting. | 1444 | * Force the grace period if too many callbacks or too long waiting. |
1461 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 1445 | * Enforce hysteresis, and don't invoke force_quiescent_state() |
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1464 | * is the only one waiting for a grace period to complete. | 1448 | * is the only one waiting for a grace period to complete. |
1465 | */ | 1449 | */ |
1466 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1450 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
1467 | rdp->blimit = LONG_MAX; | 1451 | |
1468 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 1452 | /* Are we ignoring a completed grace period? */ |
1469 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 1453 | rcu_process_gp_end(rsp, rdp); |
1470 | force_quiescent_state(rsp, 0); | 1454 | check_for_new_grace_period(rsp, rdp); |
1471 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1455 | |
1472 | rdp->qlen_last_fqs_check = rdp->qlen; | 1456 | /* Start a new grace period if one not already started. */ |
1457 | if (!rcu_gp_in_progress(rsp)) { | ||
1458 | unsigned long nestflag; | ||
1459 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1460 | |||
1461 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1462 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1463 | } else { | ||
1464 | /* Give the grace period a kick. */ | ||
1465 | rdp->blimit = LONG_MAX; | ||
1466 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1467 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1468 | force_quiescent_state(rsp, 0); | ||
1469 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1470 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1471 | } | ||
1473 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1472 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
1474 | force_quiescent_state(rsp, 1); | 1473 | force_quiescent_state(rsp, 1); |
1475 | local_irq_restore(flags); | 1474 | local_irq_restore(flags); |
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
1699 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 1698 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
1700 | * might complete its grace period before all of the other CPUs | 1699 | * might complete its grace period before all of the other CPUs |
1701 | * did their increment, causing this function to return too | 1700 | * did their increment, causing this function to return too |
1702 | * early. | 1701 | * early. Note that on_each_cpu() disables irqs, which prevents |
1702 | * any CPUs from coming online or going offline until each online | ||
1703 | * CPU has queued its RCU-barrier callback. | ||
1703 | */ | 1704 | */ |
1704 | atomic_set(&rcu_barrier_cpu_count, 1); | 1705 | atomic_set(&rcu_barrier_cpu_count, 1); |
1705 | preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ | ||
1706 | rcu_adopt_orphan_cbs(rsp); | ||
1707 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1706 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
1708 | preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ | ||
1709 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1707 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
1710 | complete(&rcu_barrier_completion); | 1708 | complete(&rcu_barrier_completion); |
1711 | wait_for_completion(&rcu_barrier_completion); | 1709 | wait_for_completion(&rcu_barrier_completion); |
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1831 | case CPU_DYING: | 1829 | case CPU_DYING: |
1832 | case CPU_DYING_FROZEN: | 1830 | case CPU_DYING_FROZEN: |
1833 | /* | 1831 | /* |
1834 | * preempt_disable() in _rcu_barrier() prevents stop_machine(), | 1832 | * The whole machine is "stopped" except this CPU, so we can |
1835 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | 1833 | * touch any data without introducing corruption. We send the |
1836 | * returns, all online cpus have queued rcu_barrier_func(). | 1834 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
1837 | * The dying CPU clears its cpu_online_mask bit and | ||
1838 | * moves all of its RCU callbacks to ->orphan_cbs_list | ||
1839 | * in the context of stop_machine(), so subsequent calls | ||
1840 | * to _rcu_barrier() will adopt these callbacks and only | ||
1841 | * then queue rcu_barrier_func() on all remaining CPUs. | ||
1842 | */ | 1835 | */ |
1843 | rcu_send_cbs_to_orphanage(&rcu_bh_state); | 1836 | rcu_send_cbs_to_online(&rcu_bh_state); |
1844 | rcu_send_cbs_to_orphanage(&rcu_sched_state); | 1837 | rcu_send_cbs_to_online(&rcu_sched_state); |
1845 | rcu_preempt_send_cbs_to_orphanage(); | 1838 | rcu_preempt_send_cbs_to_online(); |
1846 | break; | 1839 | break; |
1847 | case CPU_DEAD: | 1840 | case CPU_DEAD: |
1848 | case CPU_DEAD_FROZEN: | 1841 | case CPU_DEAD_FROZEN: |
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
1880 | { | 1873 | { |
1881 | int i; | 1874 | int i; |
1882 | 1875 | ||
1883 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) | 1876 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
1884 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 1877 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
1878 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | ||
1885 | } | 1879 | } |
1886 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 1880 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
1887 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 1881 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 91d4170c5c13..e8f057e44e3e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -31,46 +31,51 @@ | |||
31 | /* | 31 | /* |
32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. | 32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. |
33 | * In theory, it should be possible to add more levels straightforwardly. | 33 | * In theory, it should be possible to add more levels straightforwardly. |
34 | * In practice, this has not been tested, so there is probably some | 34 | * In practice, this did work well going from three levels to four. |
35 | * bug somewhere. | 35 | * Of course, your mileage may vary. |
36 | */ | 36 | */ |
37 | #define MAX_RCU_LVLS 4 | 37 | #define MAX_RCU_LVLS 4 |
38 | #define RCU_FANOUT (CONFIG_RCU_FANOUT) | 38 | #if CONFIG_RCU_FANOUT > 16 |
39 | #define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) | 39 | #define RCU_FANOUT_LEAF 16 |
40 | #define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) | 40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ |
41 | #define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) | 41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) |
42 | 42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ | |
43 | #if NR_CPUS <= RCU_FANOUT | 43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) |
44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) | ||
45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) | ||
46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | ||
47 | |||
48 | #if NR_CPUS <= RCU_FANOUT_1 | ||
44 | # define NUM_RCU_LVLS 1 | 49 | # define NUM_RCU_LVLS 1 |
45 | # define NUM_RCU_LVL_0 1 | 50 | # define NUM_RCU_LVL_0 1 |
46 | # define NUM_RCU_LVL_1 (NR_CPUS) | 51 | # define NUM_RCU_LVL_1 (NR_CPUS) |
47 | # define NUM_RCU_LVL_2 0 | 52 | # define NUM_RCU_LVL_2 0 |
48 | # define NUM_RCU_LVL_3 0 | 53 | # define NUM_RCU_LVL_3 0 |
49 | # define NUM_RCU_LVL_4 0 | 54 | # define NUM_RCU_LVL_4 0 |
50 | #elif NR_CPUS <= RCU_FANOUT_SQ | 55 | #elif NR_CPUS <= RCU_FANOUT_2 |
51 | # define NUM_RCU_LVLS 2 | 56 | # define NUM_RCU_LVLS 2 |
52 | # define NUM_RCU_LVL_0 1 | 57 | # define NUM_RCU_LVL_0 1 |
53 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 58 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
54 | # define NUM_RCU_LVL_2 (NR_CPUS) | 59 | # define NUM_RCU_LVL_2 (NR_CPUS) |
55 | # define NUM_RCU_LVL_3 0 | 60 | # define NUM_RCU_LVL_3 0 |
56 | # define NUM_RCU_LVL_4 0 | 61 | # define NUM_RCU_LVL_4 0 |
57 | #elif NR_CPUS <= RCU_FANOUT_CUBE | 62 | #elif NR_CPUS <= RCU_FANOUT_3 |
58 | # define NUM_RCU_LVLS 3 | 63 | # define NUM_RCU_LVLS 3 |
59 | # define NUM_RCU_LVL_0 1 | 64 | # define NUM_RCU_LVL_0 1 |
60 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) | 65 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
61 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 66 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
62 | # define NUM_RCU_LVL_3 NR_CPUS | 67 | # define NUM_RCU_LVL_3 (NR_CPUS) |
63 | # define NUM_RCU_LVL_4 0 | 68 | # define NUM_RCU_LVL_4 0 |
64 | #elif NR_CPUS <= RCU_FANOUT_FOURTH | 69 | #elif NR_CPUS <= RCU_FANOUT_4 |
65 | # define NUM_RCU_LVLS 4 | 70 | # define NUM_RCU_LVLS 4 |
66 | # define NUM_RCU_LVL_0 1 | 71 | # define NUM_RCU_LVL_0 1 |
67 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) | 72 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) |
68 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) | 73 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
69 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 74 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
70 | # define NUM_RCU_LVL_4 NR_CPUS | 75 | # define NUM_RCU_LVL_4 (NR_CPUS) |
71 | #else | 76 | #else |
72 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" | 77 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" |
73 | #endif /* #if (NR_CPUS) <= RCU_FANOUT */ | 78 | #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ |
74 | 79 | ||
75 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) | 80 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) |
76 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) | 81 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) |
@@ -203,8 +208,8 @@ struct rcu_data { | |||
203 | long qlen_last_fqs_check; | 208 | long qlen_last_fqs_check; |
204 | /* qlen at last check for QS forcing */ | 209 | /* qlen at last check for QS forcing */ |
205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | 210 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ |
206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | 211 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ |
207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | 212 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ |
208 | unsigned long n_force_qs_snap; | 213 | unsigned long n_force_qs_snap; |
209 | /* did other CPU force QS recently? */ | 214 | /* did other CPU force QS recently? */ |
210 | long blimit; /* Upper limit on a processed batch */ | 215 | long blimit; /* Upper limit on a processed batch */ |
@@ -309,15 +314,7 @@ struct rcu_state { | |||
309 | /* End of fields guarded by root rcu_node's lock. */ | 314 | /* End of fields guarded by root rcu_node's lock. */ |
310 | 315 | ||
311 | raw_spinlock_t onofflock; /* exclude on/offline and */ | 316 | raw_spinlock_t onofflock; /* exclude on/offline and */ |
312 | /* starting new GP. Also */ | 317 | /* starting new GP. */ |
313 | /* protects the following */ | ||
314 | /* orphan_cbs fields. */ | ||
315 | struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */ | ||
316 | /* orphaned by all CPUs in */ | ||
317 | /* a given leaf rcu_node */ | ||
318 | /* going offline. */ | ||
319 | struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ | ||
320 | long orphan_qlen; /* Number of orphaned cbs. */ | ||
321 | raw_spinlock_t fqslock; /* Only one task forcing */ | 318 | raw_spinlock_t fqslock; /* Only one task forcing */ |
322 | /* quiescent states. */ | 319 | /* quiescent states. */ |
323 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 320 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
@@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | |||
390 | static int rcu_preempt_pending(int cpu); | 387 | static int rcu_preempt_pending(int cpu); |
391 | static int rcu_preempt_needs_cpu(int cpu); | 388 | static int rcu_preempt_needs_cpu(int cpu); |
392 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 389 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
393 | static void rcu_preempt_send_cbs_to_orphanage(void); | 390 | static void rcu_preempt_send_cbs_to_online(void); |
394 | static void __init __rcu_init_preempt(void); | 391 | static void __init __rcu_init_preempt(void); |
395 | static void rcu_needs_cpu_flush(void); | 392 | static void rcu_needs_cpu_flush(void); |
396 | 393 | ||
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 71a4147473f9..a3638710dc67 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -25,6 +25,7 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/stop_machine.h> | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Check the RCU kernel configuration parameters and print informative | 31 | * Check the RCU kernel configuration parameters and print informative |
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
773 | } | 774 | } |
774 | 775 | ||
775 | /* | 776 | /* |
776 | * Move preemptable RCU's callbacks to ->orphan_cbs_list. | 777 | * Move preemptable RCU's callbacks from dying CPU to other online CPU. |
777 | */ | 778 | */ |
778 | static void rcu_preempt_send_cbs_to_orphanage(void) | 779 | static void rcu_preempt_send_cbs_to_online(void) |
779 | { | 780 | { |
780 | rcu_send_cbs_to_orphanage(&rcu_preempt_state); | 781 | rcu_send_cbs_to_online(&rcu_preempt_state); |
781 | } | 782 | } |
782 | 783 | ||
783 | /* | 784 | /* |
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
1001 | /* | 1002 | /* |
1002 | * Because there is no preemptable RCU, there are no callbacks to move. | 1003 | * Because there is no preemptable RCU, there are no callbacks to move. |
1003 | */ | 1004 | */ |
1004 | static void rcu_preempt_send_cbs_to_orphanage(void) | 1005 | static void rcu_preempt_send_cbs_to_online(void) |
1005 | { | 1006 | { |
1006 | } | 1007 | } |
1007 | 1008 | ||
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void) | |||
1014 | 1015 | ||
1015 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1016 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1016 | 1017 | ||
1018 | #ifndef CONFIG_SMP | ||
1019 | |||
1020 | void synchronize_sched_expedited(void) | ||
1021 | { | ||
1022 | cond_resched(); | ||
1023 | } | ||
1024 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
1025 | |||
1026 | #else /* #ifndef CONFIG_SMP */ | ||
1027 | |||
1028 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
1029 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
1030 | |||
1031 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
1032 | { | ||
1033 | /* | ||
1034 | * There must be a full memory barrier on each affected CPU | ||
1035 | * between the time that try_stop_cpus() is called and the | ||
1036 | * time that it returns. | ||
1037 | * | ||
1038 | * In the current initial implementation of cpu_stop, the | ||
1039 | * above condition is already met when the control reaches | ||
1040 | * this point and the following smp_mb() is not strictly | ||
1041 | * necessary. Do smp_mb() anyway for documentation and | ||
1042 | * robustness against future implementation changes. | ||
1043 | */ | ||
1044 | smp_mb(); /* See above comment block. */ | ||
1045 | return 0; | ||
1046 | } | ||
1047 | |||
1048 | /* | ||
1049 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
1050 | * approach to force grace period to end quickly. This consumes | ||
1051 | * significant time on all CPUs, and is thus not recommended for | ||
1052 | * any sort of common-case code. | ||
1053 | * | ||
1054 | * Note that it is illegal to call this function while holding any | ||
1055 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
1056 | * observe this restriction will result in deadlock. | ||
1057 | * | ||
1058 | * This implementation can be thought of as an application of ticket | ||
1059 | * locking to RCU, with sync_sched_expedited_started and | ||
1060 | * sync_sched_expedited_done taking on the roles of the halves | ||
1061 | * of the ticket-lock word. Each task atomically increments | ||
1062 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
1063 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
1064 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
1065 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
1066 | * update sync_sched_expedited_done to match our snapshot -- but | ||
1067 | * only if someone else has not already advanced past our snapshot. | ||
1068 | * | ||
1069 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
1070 | * of sync_sched_expedited_done. If it has advanced past our | ||
1071 | * initial snapshot, then someone else must have forced a grace period | ||
1072 | * some time after we took our snapshot. In this case, our work is | ||
1073 | * done for us, and we can simply return. Otherwise, we try again, | ||
1074 | * but keep our initial snapshot for purposes of checking for someone | ||
1075 | * doing our work for us. | ||
1076 | * | ||
1077 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
1078 | */ | ||
1079 | void synchronize_sched_expedited(void) | ||
1080 | { | ||
1081 | int firstsnap, s, snap, trycount = 0; | ||
1082 | |||
1083 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
1084 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
1085 | get_online_cpus(); | ||
1086 | |||
1087 | /* | ||
1088 | * Each pass through the following loop attempts to force a | ||
1089 | * context switch on each CPU. | ||
1090 | */ | ||
1091 | while (try_stop_cpus(cpu_online_mask, | ||
1092 | synchronize_sched_expedited_cpu_stop, | ||
1093 | NULL) == -EAGAIN) { | ||
1094 | put_online_cpus(); | ||
1095 | |||
1096 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
1097 | if (trycount++ < 10) | ||
1098 | udelay(trycount * num_online_cpus()); | ||
1099 | else { | ||
1100 | synchronize_sched(); | ||
1101 | return; | ||
1102 | } | ||
1103 | |||
1104 | /* Check to see if someone else did our work for us. */ | ||
1105 | s = atomic_read(&sync_sched_expedited_done); | ||
1106 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
1107 | smp_mb(); /* ensure test happens before caller kfree */ | ||
1108 | return; | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * Refetching sync_sched_expedited_started allows later | ||
1113 | * callers to piggyback on our grace period. We subtract | ||
1114 | * 1 to get the same token that the last incrementer got. | ||
1115 | * We retry after they started, so our grace period works | ||
1116 | * for them, and they started after our first try, so their | ||
1117 | * grace period works for us. | ||
1118 | */ | ||
1119 | get_online_cpus(); | ||
1120 | snap = atomic_read(&sync_sched_expedited_started) - 1; | ||
1121 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
1122 | } | ||
1123 | |||
1124 | /* | ||
1125 | * Everyone up to our most recent fetch is covered by our grace | ||
1126 | * period. Update the counter, but only if our work is still | ||
1127 | * relevant -- which it won't be if someone who started later | ||
1128 | * than we did beat us to the punch. | ||
1129 | */ | ||
1130 | do { | ||
1131 | s = atomic_read(&sync_sched_expedited_done); | ||
1132 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
1133 | smp_mb(); /* ensure test happens before caller kfree */ | ||
1134 | break; | ||
1135 | } | ||
1136 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
1137 | |||
1138 | put_online_cpus(); | ||
1139 | } | ||
1140 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
1141 | |||
1142 | #endif /* #else #ifndef CONFIG_SMP */ | ||
1143 | |||
1017 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | 1144 | #if !defined(CONFIG_RCU_FAST_NO_HZ) |
1018 | 1145 | ||
1019 | /* | 1146 | /* |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d15430b9d122..c8e97853b970 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
166 | 166 | ||
167 | gpnum = rsp->gpnum; | 167 | gpnum = rsp->gpnum; |
168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", | 169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", |
170 | rsp->completed, gpnum, rsp->signaled, | 170 | rsp->completed, gpnum, rsp->signaled, |
171 | (long)(rsp->jiffies_force_qs - jiffies), | 171 | (long)(rsp->jiffies_force_qs - jiffies), |
172 | (int)(jiffies & 0xffff), | 172 | (int)(jiffies & 0xffff), |
173 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 173 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
174 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 174 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
175 | rsp->n_force_qs_lh, rsp->orphan_qlen); | 175 | rsp->n_force_qs_lh); |
176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { | 176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { |
177 | if (rnp->level != level) { | 177 | if (rnp->level != level) { |
178 | seq_puts(m, "\n"); | 178 | seq_puts(m, "\n"); |
@@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = { | |||
300 | 300 | ||
301 | static struct dentry *rcudir; | 301 | static struct dentry *rcudir; |
302 | 302 | ||
303 | static int __init rcuclassic_trace_init(void) | 303 | static int __init rcutree_trace_init(void) |
304 | { | 304 | { |
305 | struct dentry *retval; | 305 | struct dentry *retval; |
306 | 306 | ||
@@ -337,14 +337,14 @@ free_out: | |||
337 | return 1; | 337 | return 1; |
338 | } | 338 | } |
339 | 339 | ||
340 | static void __exit rcuclassic_trace_cleanup(void) | 340 | static void __exit rcutree_trace_cleanup(void) |
341 | { | 341 | { |
342 | debugfs_remove_recursive(rcudir); | 342 | debugfs_remove_recursive(rcudir); |
343 | } | 343 | } |
344 | 344 | ||
345 | 345 | ||
346 | module_init(rcuclassic_trace_init); | 346 | module_init(rcutree_trace_init); |
347 | module_exit(rcuclassic_trace_cleanup); | 347 | module_exit(rcutree_trace_cleanup); |
348 | 348 | ||
349 | MODULE_AUTHOR("Paul E. McKenney"); | 349 | MODULE_AUTHOR("Paul E. McKenney"); |
350 | MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); | 350 | MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); |
diff --git a/kernel/sched.c b/kernel/sched.c index c68cead94dd7..04949089e760 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -75,9 +75,11 @@ | |||
75 | 75 | ||
76 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
77 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
78 | #include <asm/mutex.h> | ||
78 | 79 | ||
79 | #include "sched_cpupri.h" | 80 | #include "sched_cpupri.h" |
80 | #include "workqueue_sched.h" | 81 | #include "workqueue_sched.h" |
82 | #include "sched_autogroup.h" | ||
81 | 83 | ||
82 | #define CREATE_TRACE_POINTS | 84 | #define CREATE_TRACE_POINTS |
83 | #include <trace/events/sched.h> | 85 | #include <trace/events/sched.h> |
@@ -253,6 +255,8 @@ struct task_group { | |||
253 | /* runqueue "owned" by this group on each cpu */ | 255 | /* runqueue "owned" by this group on each cpu */ |
254 | struct cfs_rq **cfs_rq; | 256 | struct cfs_rq **cfs_rq; |
255 | unsigned long shares; | 257 | unsigned long shares; |
258 | |||
259 | atomic_t load_weight; | ||
256 | #endif | 260 | #endif |
257 | 261 | ||
258 | #ifdef CONFIG_RT_GROUP_SCHED | 262 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -268,24 +272,19 @@ struct task_group { | |||
268 | struct task_group *parent; | 272 | struct task_group *parent; |
269 | struct list_head siblings; | 273 | struct list_head siblings; |
270 | struct list_head children; | 274 | struct list_head children; |
275 | |||
276 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
277 | struct autogroup *autogroup; | ||
278 | #endif | ||
271 | }; | 279 | }; |
272 | 280 | ||
273 | #define root_task_group init_task_group | 281 | #define root_task_group init_task_group |
274 | 282 | ||
275 | /* task_group_lock serializes add/remove of task groups and also changes to | 283 | /* task_group_lock serializes the addition/removal of task groups */ |
276 | * a task group's cpu shares. | ||
277 | */ | ||
278 | static DEFINE_SPINLOCK(task_group_lock); | 284 | static DEFINE_SPINLOCK(task_group_lock); |
279 | 285 | ||
280 | #ifdef CONFIG_FAIR_GROUP_SCHED | 286 | #ifdef CONFIG_FAIR_GROUP_SCHED |
281 | 287 | ||
282 | #ifdef CONFIG_SMP | ||
283 | static int root_task_group_empty(void) | ||
284 | { | ||
285 | return list_empty(&root_task_group.children); | ||
286 | } | ||
287 | #endif | ||
288 | |||
289 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 288 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
290 | 289 | ||
291 | /* | 290 | /* |
@@ -342,6 +341,7 @@ struct cfs_rq { | |||
342 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | 341 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This |
343 | * list is used during load balance. | 342 | * list is used during load balance. |
344 | */ | 343 | */ |
344 | int on_list; | ||
345 | struct list_head leaf_cfs_rq_list; | 345 | struct list_head leaf_cfs_rq_list; |
346 | struct task_group *tg; /* group that "owns" this runqueue */ | 346 | struct task_group *tg; /* group that "owns" this runqueue */ |
347 | 347 | ||
@@ -360,14 +360,17 @@ struct cfs_rq { | |||
360 | unsigned long h_load; | 360 | unsigned long h_load; |
361 | 361 | ||
362 | /* | 362 | /* |
363 | * this cpu's part of tg->shares | 363 | * Maintaining per-cpu shares distribution for group scheduling |
364 | * | ||
365 | * load_stamp is the last time we updated the load average | ||
366 | * load_last is the last time we updated the load average and saw load | ||
367 | * load_unacc_exec_time is currently unaccounted execution time | ||
364 | */ | 368 | */ |
365 | unsigned long shares; | 369 | u64 load_avg; |
370 | u64 load_period; | ||
371 | u64 load_stamp, load_last, load_unacc_exec_time; | ||
366 | 372 | ||
367 | /* | 373 | unsigned long load_contribution; |
368 | * load.weight at the time we set shares | ||
369 | */ | ||
370 | unsigned long rq_weight; | ||
371 | #endif | 374 | #endif |
372 | #endif | 375 | #endif |
373 | }; | 376 | }; |
@@ -605,11 +608,14 @@ static inline int cpu_of(struct rq *rq) | |||
605 | */ | 608 | */ |
606 | static inline struct task_group *task_group(struct task_struct *p) | 609 | static inline struct task_group *task_group(struct task_struct *p) |
607 | { | 610 | { |
611 | struct task_group *tg; | ||
608 | struct cgroup_subsys_state *css; | 612 | struct cgroup_subsys_state *css; |
609 | 613 | ||
610 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 614 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
611 | lockdep_is_held(&task_rq(p)->lock)); | 615 | lockdep_is_held(&task_rq(p)->lock)); |
612 | return container_of(css, struct task_group, css); | 616 | tg = container_of(css, struct task_group, css); |
617 | |||
618 | return autogroup_task_group(p, tg); | ||
613 | } | 619 | } |
614 | 620 | ||
615 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 621 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
@@ -793,20 +799,6 @@ late_initcall(sched_init_debug); | |||
793 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 799 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
794 | 800 | ||
795 | /* | 801 | /* |
796 | * ratelimit for updating the group shares. | ||
797 | * default: 0.25ms | ||
798 | */ | ||
799 | unsigned int sysctl_sched_shares_ratelimit = 250000; | ||
800 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
801 | |||
802 | /* | ||
803 | * Inject some fuzzyness into changing the per-cpu group shares | ||
804 | * this avoids remote rq-locks at the expense of fairness. | ||
805 | * default: 4 | ||
806 | */ | ||
807 | unsigned int sysctl_sched_shares_thresh = 4; | ||
808 | |||
809 | /* | ||
810 | * period over which we average the RT time consumption, measured | 802 | * period over which we average the RT time consumption, measured |
811 | * in ms. | 803 | * in ms. |
812 | * | 804 | * |
@@ -1355,6 +1347,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
1355 | lw->inv_weight = 0; | 1347 | lw->inv_weight = 0; |
1356 | } | 1348 | } |
1357 | 1349 | ||
1350 | static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
1351 | { | ||
1352 | lw->weight = w; | ||
1353 | lw->inv_weight = 0; | ||
1354 | } | ||
1355 | |||
1358 | /* | 1356 | /* |
1359 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | 1357 | * To aid in avoiding the subversion of "niceness" due to uneven distribution |
1360 | * of tasks with abnormal "nice" values across CPUs the contribution that | 1358 | * of tasks with abnormal "nice" values across CPUs the contribution that |
@@ -1543,101 +1541,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1543 | 1541 | ||
1544 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1542 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1545 | 1543 | ||
1546 | static __read_mostly unsigned long __percpu *update_shares_data; | ||
1547 | |||
1548 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1549 | |||
1550 | /* | ||
1551 | * Calculate and set the cpu's group shares. | ||
1552 | */ | ||
1553 | static void update_group_shares_cpu(struct task_group *tg, int cpu, | ||
1554 | unsigned long sd_shares, | ||
1555 | unsigned long sd_rq_weight, | ||
1556 | unsigned long *usd_rq_weight) | ||
1557 | { | ||
1558 | unsigned long shares, rq_weight; | ||
1559 | int boost = 0; | ||
1560 | |||
1561 | rq_weight = usd_rq_weight[cpu]; | ||
1562 | if (!rq_weight) { | ||
1563 | boost = 1; | ||
1564 | rq_weight = NICE_0_LOAD; | ||
1565 | } | ||
1566 | |||
1567 | /* | ||
1568 | * \Sum_j shares_j * rq_weight_i | ||
1569 | * shares_i = ----------------------------- | ||
1570 | * \Sum_j rq_weight_j | ||
1571 | */ | ||
1572 | shares = (sd_shares * rq_weight) / sd_rq_weight; | ||
1573 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1574 | |||
1575 | if (abs(shares - tg->se[cpu]->load.weight) > | ||
1576 | sysctl_sched_shares_thresh) { | ||
1577 | struct rq *rq = cpu_rq(cpu); | ||
1578 | unsigned long flags; | ||
1579 | |||
1580 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
1581 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | ||
1582 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1583 | __set_se_shares(tg->se[cpu], shares); | ||
1584 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | /* | ||
1589 | * Re-compute the task group their per cpu shares over the given domain. | ||
1590 | * This needs to be done in a bottom-up fashion because the rq weight of a | ||
1591 | * parent group depends on the shares of its child groups. | ||
1592 | */ | ||
1593 | static int tg_shares_up(struct task_group *tg, void *data) | ||
1594 | { | ||
1595 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; | ||
1596 | unsigned long *usd_rq_weight; | ||
1597 | struct sched_domain *sd = data; | ||
1598 | unsigned long flags; | ||
1599 | int i; | ||
1600 | |||
1601 | if (!tg->se[0]) | ||
1602 | return 0; | ||
1603 | |||
1604 | local_irq_save(flags); | ||
1605 | usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); | ||
1606 | |||
1607 | for_each_cpu(i, sched_domain_span(sd)) { | ||
1608 | weight = tg->cfs_rq[i]->load.weight; | ||
1609 | usd_rq_weight[i] = weight; | ||
1610 | |||
1611 | rq_weight += weight; | ||
1612 | /* | ||
1613 | * If there are currently no tasks on the cpu pretend there | ||
1614 | * is one of average load so that when a new task gets to | ||
1615 | * run here it will not get delayed by group starvation. | ||
1616 | */ | ||
1617 | if (!weight) | ||
1618 | weight = NICE_0_LOAD; | ||
1619 | |||
1620 | sum_weight += weight; | ||
1621 | shares += tg->cfs_rq[i]->shares; | ||
1622 | } | ||
1623 | |||
1624 | if (!rq_weight) | ||
1625 | rq_weight = sum_weight; | ||
1626 | |||
1627 | if ((!shares && rq_weight) || shares > tg->shares) | ||
1628 | shares = tg->shares; | ||
1629 | |||
1630 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | ||
1631 | shares = tg->shares; | ||
1632 | |||
1633 | for_each_cpu(i, sched_domain_span(sd)) | ||
1634 | update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); | ||
1635 | |||
1636 | local_irq_restore(flags); | ||
1637 | |||
1638 | return 0; | ||
1639 | } | ||
1640 | |||
1641 | /* | 1544 | /* |
1642 | * Compute the cpu's hierarchical load factor for each task group. | 1545 | * Compute the cpu's hierarchical load factor for each task group. |
1643 | * This needs to be done in a top-down fashion because the load of a child | 1546 | * This needs to be done in a top-down fashion because the load of a child |
@@ -1652,7 +1555,7 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1652 | load = cpu_rq(cpu)->load.weight; | 1555 | load = cpu_rq(cpu)->load.weight; |
1653 | } else { | 1556 | } else { |
1654 | load = tg->parent->cfs_rq[cpu]->h_load; | 1557 | load = tg->parent->cfs_rq[cpu]->h_load; |
1655 | load *= tg->cfs_rq[cpu]->shares; | 1558 | load *= tg->se[cpu]->load.weight; |
1656 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | 1559 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; |
1657 | } | 1560 | } |
1658 | 1561 | ||
@@ -1661,34 +1564,11 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1661 | return 0; | 1564 | return 0; |
1662 | } | 1565 | } |
1663 | 1566 | ||
1664 | static void update_shares(struct sched_domain *sd) | ||
1665 | { | ||
1666 | s64 elapsed; | ||
1667 | u64 now; | ||
1668 | |||
1669 | if (root_task_group_empty()) | ||
1670 | return; | ||
1671 | |||
1672 | now = local_clock(); | ||
1673 | elapsed = now - sd->last_update; | ||
1674 | |||
1675 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | ||
1676 | sd->last_update = now; | ||
1677 | walk_tg_tree(tg_nop, tg_shares_up, sd); | ||
1678 | } | ||
1679 | } | ||
1680 | |||
1681 | static void update_h_load(long cpu) | 1567 | static void update_h_load(long cpu) |
1682 | { | 1568 | { |
1683 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1569 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1684 | } | 1570 | } |
1685 | 1571 | ||
1686 | #else | ||
1687 | |||
1688 | static inline void update_shares(struct sched_domain *sd) | ||
1689 | { | ||
1690 | } | ||
1691 | |||
1692 | #endif | 1572 | #endif |
1693 | 1573 | ||
1694 | #ifdef CONFIG_PREEMPT | 1574 | #ifdef CONFIG_PREEMPT |
@@ -1810,15 +1690,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1810 | 1690 | ||
1811 | #endif | 1691 | #endif |
1812 | 1692 | ||
1813 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1814 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1815 | { | ||
1816 | #ifdef CONFIG_SMP | ||
1817 | cfs_rq->shares = shares; | ||
1818 | #endif | ||
1819 | } | ||
1820 | #endif | ||
1821 | |||
1822 | static void calc_load_account_idle(struct rq *this_rq); | 1693 | static void calc_load_account_idle(struct rq *this_rq); |
1823 | static void update_sysctl(void); | 1694 | static void update_sysctl(void); |
1824 | static int get_update_sysctl_factor(void); | 1695 | static int get_update_sysctl_factor(void); |
@@ -2063,6 +1934,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
2063 | #include "sched_idletask.c" | 1934 | #include "sched_idletask.c" |
2064 | #include "sched_fair.c" | 1935 | #include "sched_fair.c" |
2065 | #include "sched_rt.c" | 1936 | #include "sched_rt.c" |
1937 | #include "sched_autogroup.c" | ||
2066 | #include "sched_stoptask.c" | 1938 | #include "sched_stoptask.c" |
2067 | #ifdef CONFIG_SCHED_DEBUG | 1939 | #ifdef CONFIG_SCHED_DEBUG |
2068 | # include "sched_debug.c" | 1940 | # include "sched_debug.c" |
@@ -2255,10 +2127,8 @@ static int migration_cpu_stop(void *data); | |||
2255 | * The task's runqueue lock must be held. | 2127 | * The task's runqueue lock must be held. |
2256 | * Returns true if you have to wait for migration thread. | 2128 | * Returns true if you have to wait for migration thread. |
2257 | */ | 2129 | */ |
2258 | static bool migrate_task(struct task_struct *p, int dest_cpu) | 2130 | static bool migrate_task(struct task_struct *p, struct rq *rq) |
2259 | { | 2131 | { |
2260 | struct rq *rq = task_rq(p); | ||
2261 | |||
2262 | /* | 2132 | /* |
2263 | * If the task is not on a runqueue (and not running), then | 2133 | * If the task is not on a runqueue (and not running), then |
2264 | * the next wake-up will properly place the task. | 2134 | * the next wake-up will properly place the task. |
@@ -2438,18 +2308,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2438 | return dest_cpu; | 2308 | return dest_cpu; |
2439 | 2309 | ||
2440 | /* No more Mr. Nice Guy. */ | 2310 | /* No more Mr. Nice Guy. */ |
2441 | if (unlikely(dest_cpu >= nr_cpu_ids)) { | 2311 | dest_cpu = cpuset_cpus_allowed_fallback(p); |
2442 | dest_cpu = cpuset_cpus_allowed_fallback(p); | 2312 | /* |
2443 | /* | 2313 | * Don't tell them about moving exiting tasks or |
2444 | * Don't tell them about moving exiting tasks or | 2314 | * kernel threads (both mm NULL), since they never |
2445 | * kernel threads (both mm NULL), since they never | 2315 | * leave kernel. |
2446 | * leave kernel. | 2316 | */ |
2447 | */ | 2317 | if (p->mm && printk_ratelimit()) { |
2448 | if (p->mm && printk_ratelimit()) { | 2318 | printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", |
2449 | printk(KERN_INFO "process %d (%s) no " | 2319 | task_pid_nr(p), p->comm, cpu); |
2450 | "longer affine to cpu%d\n", | ||
2451 | task_pid_nr(p), p->comm, cpu); | ||
2452 | } | ||
2453 | } | 2320 | } |
2454 | 2321 | ||
2455 | return dest_cpu; | 2322 | return dest_cpu; |
@@ -2785,7 +2652,9 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2785 | /* Want to start with kernel preemption disabled. */ | 2652 | /* Want to start with kernel preemption disabled. */ |
2786 | task_thread_info(p)->preempt_count = 1; | 2653 | task_thread_info(p)->preempt_count = 1; |
2787 | #endif | 2654 | #endif |
2655 | #ifdef CONFIG_SMP | ||
2788 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | 2656 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
2657 | #endif | ||
2789 | 2658 | ||
2790 | put_cpu(); | 2659 | put_cpu(); |
2791 | } | 2660 | } |
@@ -3549,7 +3418,7 @@ void sched_exec(void) | |||
3549 | * select_task_rq() can race against ->cpus_allowed | 3418 | * select_task_rq() can race against ->cpus_allowed |
3550 | */ | 3419 | */ |
3551 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3420 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
3552 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { | 3421 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { |
3553 | struct migration_arg arg = { p, dest_cpu }; | 3422 | struct migration_arg arg = { p, dest_cpu }; |
3554 | 3423 | ||
3555 | task_rq_unlock(rq, &flags); | 3424 | task_rq_unlock(rq, &flags); |
@@ -4214,7 +4083,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | |||
4214 | if (task_thread_info(rq->curr) != owner || need_resched()) | 4083 | if (task_thread_info(rq->curr) != owner || need_resched()) |
4215 | return 0; | 4084 | return 0; |
4216 | 4085 | ||
4217 | cpu_relax(); | 4086 | arch_mutex_cpu_relax(); |
4218 | } | 4087 | } |
4219 | 4088 | ||
4220 | return 1; | 4089 | return 1; |
@@ -4526,7 +4395,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); | |||
4526 | * This waits for either a completion of a specific task to be signaled or for a | 4395 | * This waits for either a completion of a specific task to be signaled or for a |
4527 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. | 4396 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. |
4528 | */ | 4397 | */ |
4529 | unsigned long __sched | 4398 | long __sched |
4530 | wait_for_completion_interruptible_timeout(struct completion *x, | 4399 | wait_for_completion_interruptible_timeout(struct completion *x, |
4531 | unsigned long timeout) | 4400 | unsigned long timeout) |
4532 | { | 4401 | { |
@@ -4559,7 +4428,7 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
4559 | * signaled or for a specified timeout to expire. It can be | 4428 | * signaled or for a specified timeout to expire. It can be |
4560 | * interrupted by a kill signal. The timeout is in jiffies. | 4429 | * interrupted by a kill signal. The timeout is in jiffies. |
4561 | */ | 4430 | */ |
4562 | unsigned long __sched | 4431 | long __sched |
4563 | wait_for_completion_killable_timeout(struct completion *x, | 4432 | wait_for_completion_killable_timeout(struct completion *x, |
4564 | unsigned long timeout) | 4433 | unsigned long timeout) |
4565 | { | 4434 | { |
@@ -4901,7 +4770,7 @@ static bool check_same_owner(struct task_struct *p) | |||
4901 | } | 4770 | } |
4902 | 4771 | ||
4903 | static int __sched_setscheduler(struct task_struct *p, int policy, | 4772 | static int __sched_setscheduler(struct task_struct *p, int policy, |
4904 | struct sched_param *param, bool user) | 4773 | const struct sched_param *param, bool user) |
4905 | { | 4774 | { |
4906 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 4775 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
4907 | unsigned long flags; | 4776 | unsigned long flags; |
@@ -5056,7 +4925,7 @@ recheck: | |||
5056 | * NOTE that the task may be already dead. | 4925 | * NOTE that the task may be already dead. |
5057 | */ | 4926 | */ |
5058 | int sched_setscheduler(struct task_struct *p, int policy, | 4927 | int sched_setscheduler(struct task_struct *p, int policy, |
5059 | struct sched_param *param) | 4928 | const struct sched_param *param) |
5060 | { | 4929 | { |
5061 | return __sched_setscheduler(p, policy, param, true); | 4930 | return __sched_setscheduler(p, policy, param, true); |
5062 | } | 4931 | } |
@@ -5074,7 +4943,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); | |||
5074 | * but our caller might not have that capability. | 4943 | * but our caller might not have that capability. |
5075 | */ | 4944 | */ |
5076 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | 4945 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, |
5077 | struct sched_param *param) | 4946 | const struct sched_param *param) |
5078 | { | 4947 | { |
5079 | return __sched_setscheduler(p, policy, param, false); | 4948 | return __sched_setscheduler(p, policy, param, false); |
5080 | } | 4949 | } |
@@ -5590,7 +5459,7 @@ void sched_show_task(struct task_struct *p) | |||
5590 | unsigned state; | 5459 | unsigned state; |
5591 | 5460 | ||
5592 | state = p->state ? __ffs(p->state) + 1 : 0; | 5461 | state = p->state ? __ffs(p->state) + 1 : 0; |
5593 | printk(KERN_INFO "%-13.13s %c", p->comm, | 5462 | printk(KERN_INFO "%-15.15s %c", p->comm, |
5594 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); | 5463 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
5595 | #if BITS_PER_LONG == 32 | 5464 | #if BITS_PER_LONG == 32 |
5596 | if (state == TASK_RUNNING) | 5465 | if (state == TASK_RUNNING) |
@@ -5754,7 +5623,6 @@ static void update_sysctl(void) | |||
5754 | SET_SYSCTL(sched_min_granularity); | 5623 | SET_SYSCTL(sched_min_granularity); |
5755 | SET_SYSCTL(sched_latency); | 5624 | SET_SYSCTL(sched_latency); |
5756 | SET_SYSCTL(sched_wakeup_granularity); | 5625 | SET_SYSCTL(sched_wakeup_granularity); |
5757 | SET_SYSCTL(sched_shares_ratelimit); | ||
5758 | #undef SET_SYSCTL | 5626 | #undef SET_SYSCTL |
5759 | } | 5627 | } |
5760 | 5628 | ||
@@ -5830,7 +5698,7 @@ again: | |||
5830 | goto out; | 5698 | goto out; |
5831 | 5699 | ||
5832 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5700 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5833 | if (migrate_task(p, dest_cpu)) { | 5701 | if (migrate_task(p, rq)) { |
5834 | struct migration_arg arg = { p, dest_cpu }; | 5702 | struct migration_arg arg = { p, dest_cpu }; |
5835 | /* Need help from migration thread: drop lock and wait. */ | 5703 | /* Need help from migration thread: drop lock and wait. */ |
5836 | task_rq_unlock(rq, &flags); | 5704 | task_rq_unlock(rq, &flags); |
@@ -5912,29 +5780,20 @@ static int migration_cpu_stop(void *data) | |||
5912 | } | 5780 | } |
5913 | 5781 | ||
5914 | #ifdef CONFIG_HOTPLUG_CPU | 5782 | #ifdef CONFIG_HOTPLUG_CPU |
5783 | |||
5915 | /* | 5784 | /* |
5916 | * Figure out where task on dead CPU should go, use force if necessary. | 5785 | * Ensures that the idle task is using init_mm right before its cpu goes |
5786 | * offline. | ||
5917 | */ | 5787 | */ |
5918 | void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5788 | void idle_task_exit(void) |
5919 | { | 5789 | { |
5920 | struct rq *rq = cpu_rq(dead_cpu); | 5790 | struct mm_struct *mm = current->active_mm; |
5921 | int needs_cpu, uninitialized_var(dest_cpu); | ||
5922 | unsigned long flags; | ||
5923 | 5791 | ||
5924 | local_irq_save(flags); | 5792 | BUG_ON(cpu_online(smp_processor_id())); |
5925 | 5793 | ||
5926 | raw_spin_lock(&rq->lock); | 5794 | if (mm != &init_mm) |
5927 | needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); | 5795 | switch_mm(mm, &init_mm, current); |
5928 | if (needs_cpu) | 5796 | mmdrop(mm); |
5929 | dest_cpu = select_fallback_rq(dead_cpu, p); | ||
5930 | raw_spin_unlock(&rq->lock); | ||
5931 | /* | ||
5932 | * It can only fail if we race with set_cpus_allowed(), | ||
5933 | * in the racer should migrate the task anyway. | ||
5934 | */ | ||
5935 | if (needs_cpu) | ||
5936 | __migrate_task(p, dead_cpu, dest_cpu); | ||
5937 | local_irq_restore(flags); | ||
5938 | } | 5797 | } |
5939 | 5798 | ||
5940 | /* | 5799 | /* |
@@ -5947,128 +5806,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5947 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 5806 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
5948 | { | 5807 | { |
5949 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); | 5808 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
5950 | unsigned long flags; | ||
5951 | 5809 | ||
5952 | local_irq_save(flags); | ||
5953 | double_rq_lock(rq_src, rq_dest); | ||
5954 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | 5810 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; |
5955 | rq_src->nr_uninterruptible = 0; | 5811 | rq_src->nr_uninterruptible = 0; |
5956 | double_rq_unlock(rq_src, rq_dest); | ||
5957 | local_irq_restore(flags); | ||
5958 | } | ||
5959 | |||
5960 | /* Run through task list and migrate tasks from the dead cpu. */ | ||
5961 | static void migrate_live_tasks(int src_cpu) | ||
5962 | { | ||
5963 | struct task_struct *p, *t; | ||
5964 | |||
5965 | read_lock(&tasklist_lock); | ||
5966 | |||
5967 | do_each_thread(t, p) { | ||
5968 | if (p == current) | ||
5969 | continue; | ||
5970 | |||
5971 | if (task_cpu(p) == src_cpu) | ||
5972 | move_task_off_dead_cpu(src_cpu, p); | ||
5973 | } while_each_thread(t, p); | ||
5974 | |||
5975 | read_unlock(&tasklist_lock); | ||
5976 | } | 5812 | } |
5977 | 5813 | ||
5978 | /* | 5814 | /* |
5979 | * Schedules idle task to be the next runnable task on current CPU. | 5815 | * remove the tasks which were accounted by rq from calc_load_tasks. |
5980 | * It does so by boosting its priority to highest possible. | ||
5981 | * Used by CPU offline code. | ||
5982 | */ | 5816 | */ |
5983 | void sched_idle_next(void) | 5817 | static void calc_global_load_remove(struct rq *rq) |
5984 | { | 5818 | { |
5985 | int this_cpu = smp_processor_id(); | 5819 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); |
5986 | struct rq *rq = cpu_rq(this_cpu); | 5820 | rq->calc_load_active = 0; |
5987 | struct task_struct *p = rq->idle; | ||
5988 | unsigned long flags; | ||
5989 | |||
5990 | /* cpu has to be offline */ | ||
5991 | BUG_ON(cpu_online(this_cpu)); | ||
5992 | |||
5993 | /* | ||
5994 | * Strictly not necessary since rest of the CPUs are stopped by now | ||
5995 | * and interrupts disabled on the current cpu. | ||
5996 | */ | ||
5997 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5998 | |||
5999 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
6000 | |||
6001 | activate_task(rq, p, 0); | ||
6002 | |||
6003 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
6004 | } | 5821 | } |
6005 | 5822 | ||
6006 | /* | 5823 | /* |
6007 | * Ensures that the idle task is using init_mm right before its cpu goes | 5824 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
6008 | * offline. | 5825 | * try_to_wake_up()->select_task_rq(). |
5826 | * | ||
5827 | * Called with rq->lock held even though we'er in stop_machine() and | ||
5828 | * there's no concurrency possible, we hold the required locks anyway | ||
5829 | * because of lock validation efforts. | ||
6009 | */ | 5830 | */ |
6010 | void idle_task_exit(void) | 5831 | static void migrate_tasks(unsigned int dead_cpu) |
6011 | { | ||
6012 | struct mm_struct *mm = current->active_mm; | ||
6013 | |||
6014 | BUG_ON(cpu_online(smp_processor_id())); | ||
6015 | |||
6016 | if (mm != &init_mm) | ||
6017 | switch_mm(mm, &init_mm, current); | ||
6018 | mmdrop(mm); | ||
6019 | } | ||
6020 | |||
6021 | /* called under rq->lock with disabled interrupts */ | ||
6022 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | ||
6023 | { | 5832 | { |
6024 | struct rq *rq = cpu_rq(dead_cpu); | 5833 | struct rq *rq = cpu_rq(dead_cpu); |
6025 | 5834 | struct task_struct *next, *stop = rq->stop; | |
6026 | /* Must be exiting, otherwise would be on tasklist. */ | 5835 | int dest_cpu; |
6027 | BUG_ON(!p->exit_state); | ||
6028 | |||
6029 | /* Cannot have done final schedule yet: would have vanished. */ | ||
6030 | BUG_ON(p->state == TASK_DEAD); | ||
6031 | |||
6032 | get_task_struct(p); | ||
6033 | 5836 | ||
6034 | /* | 5837 | /* |
6035 | * Drop lock around migration; if someone else moves it, | 5838 | * Fudge the rq selection such that the below task selection loop |
6036 | * that's OK. No task can be added to this CPU, so iteration is | 5839 | * doesn't get stuck on the currently eligible stop task. |
6037 | * fine. | 5840 | * |
5841 | * We're currently inside stop_machine() and the rq is either stuck | ||
5842 | * in the stop_machine_cpu_stop() loop, or we're executing this code, | ||
5843 | * either way we should never end up calling schedule() until we're | ||
5844 | * done here. | ||
6038 | */ | 5845 | */ |
6039 | raw_spin_unlock_irq(&rq->lock); | 5846 | rq->stop = NULL; |
6040 | move_task_off_dead_cpu(dead_cpu, p); | ||
6041 | raw_spin_lock_irq(&rq->lock); | ||
6042 | |||
6043 | put_task_struct(p); | ||
6044 | } | ||
6045 | |||
6046 | /* release_task() removes task from tasklist, so we won't find dead tasks. */ | ||
6047 | static void migrate_dead_tasks(unsigned int dead_cpu) | ||
6048 | { | ||
6049 | struct rq *rq = cpu_rq(dead_cpu); | ||
6050 | struct task_struct *next; | ||
6051 | 5847 | ||
6052 | for ( ; ; ) { | 5848 | for ( ; ; ) { |
6053 | if (!rq->nr_running) | 5849 | /* |
5850 | * There's this thread running, bail when that's the only | ||
5851 | * remaining thread. | ||
5852 | */ | ||
5853 | if (rq->nr_running == 1) | ||
6054 | break; | 5854 | break; |
5855 | |||
6055 | next = pick_next_task(rq); | 5856 | next = pick_next_task(rq); |
6056 | if (!next) | 5857 | BUG_ON(!next); |
6057 | break; | ||
6058 | next->sched_class->put_prev_task(rq, next); | 5858 | next->sched_class->put_prev_task(rq, next); |
6059 | migrate_dead(dead_cpu, next); | ||
6060 | 5859 | ||
5860 | /* Find suitable destination for @next, with force if needed. */ | ||
5861 | dest_cpu = select_fallback_rq(dead_cpu, next); | ||
5862 | raw_spin_unlock(&rq->lock); | ||
5863 | |||
5864 | __migrate_task(next, dead_cpu, dest_cpu); | ||
5865 | |||
5866 | raw_spin_lock(&rq->lock); | ||
6061 | } | 5867 | } |
6062 | } | ||
6063 | 5868 | ||
6064 | /* | 5869 | rq->stop = stop; |
6065 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
6066 | */ | ||
6067 | static void calc_global_load_remove(struct rq *rq) | ||
6068 | { | ||
6069 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
6070 | rq->calc_load_active = 0; | ||
6071 | } | 5870 | } |
5871 | |||
6072 | #endif /* CONFIG_HOTPLUG_CPU */ | 5872 | #endif /* CONFIG_HOTPLUG_CPU */ |
6073 | 5873 | ||
6074 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 5874 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -6278,15 +6078,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6278 | unsigned long flags; | 6078 | unsigned long flags; |
6279 | struct rq *rq = cpu_rq(cpu); | 6079 | struct rq *rq = cpu_rq(cpu); |
6280 | 6080 | ||
6281 | switch (action) { | 6081 | switch (action & ~CPU_TASKS_FROZEN) { |
6282 | 6082 | ||
6283 | case CPU_UP_PREPARE: | 6083 | case CPU_UP_PREPARE: |
6284 | case CPU_UP_PREPARE_FROZEN: | ||
6285 | rq->calc_load_update = calc_load_update; | 6084 | rq->calc_load_update = calc_load_update; |
6286 | break; | 6085 | break; |
6287 | 6086 | ||
6288 | case CPU_ONLINE: | 6087 | case CPU_ONLINE: |
6289 | case CPU_ONLINE_FROZEN: | ||
6290 | /* Update our root-domain */ | 6088 | /* Update our root-domain */ |
6291 | raw_spin_lock_irqsave(&rq->lock, flags); | 6089 | raw_spin_lock_irqsave(&rq->lock, flags); |
6292 | if (rq->rd) { | 6090 | if (rq->rd) { |
@@ -6298,30 +6096,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6298 | break; | 6096 | break; |
6299 | 6097 | ||
6300 | #ifdef CONFIG_HOTPLUG_CPU | 6098 | #ifdef CONFIG_HOTPLUG_CPU |
6301 | case CPU_DEAD: | ||
6302 | case CPU_DEAD_FROZEN: | ||
6303 | migrate_live_tasks(cpu); | ||
6304 | /* Idle task back to normal (off runqueue, low prio) */ | ||
6305 | raw_spin_lock_irq(&rq->lock); | ||
6306 | deactivate_task(rq, rq->idle, 0); | ||
6307 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | ||
6308 | rq->idle->sched_class = &idle_sched_class; | ||
6309 | migrate_dead_tasks(cpu); | ||
6310 | raw_spin_unlock_irq(&rq->lock); | ||
6311 | migrate_nr_uninterruptible(rq); | ||
6312 | BUG_ON(rq->nr_running != 0); | ||
6313 | calc_global_load_remove(rq); | ||
6314 | break; | ||
6315 | |||
6316 | case CPU_DYING: | 6099 | case CPU_DYING: |
6317 | case CPU_DYING_FROZEN: | ||
6318 | /* Update our root-domain */ | 6100 | /* Update our root-domain */ |
6319 | raw_spin_lock_irqsave(&rq->lock, flags); | 6101 | raw_spin_lock_irqsave(&rq->lock, flags); |
6320 | if (rq->rd) { | 6102 | if (rq->rd) { |
6321 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 6103 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
6322 | set_rq_offline(rq); | 6104 | set_rq_offline(rq); |
6323 | } | 6105 | } |
6106 | migrate_tasks(cpu); | ||
6107 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | ||
6324 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 6108 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6109 | |||
6110 | migrate_nr_uninterruptible(rq); | ||
6111 | calc_global_load_remove(rq); | ||
6325 | break; | 6112 | break; |
6326 | #endif | 6113 | #endif |
6327 | } | 6114 | } |
@@ -8052,15 +7839,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
8052 | 7839 | ||
8053 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7840 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8054 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | 7841 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, |
8055 | struct sched_entity *se, int cpu, int add, | 7842 | struct sched_entity *se, int cpu, |
8056 | struct sched_entity *parent) | 7843 | struct sched_entity *parent) |
8057 | { | 7844 | { |
8058 | struct rq *rq = cpu_rq(cpu); | 7845 | struct rq *rq = cpu_rq(cpu); |
8059 | tg->cfs_rq[cpu] = cfs_rq; | 7846 | tg->cfs_rq[cpu] = cfs_rq; |
8060 | init_cfs_rq(cfs_rq, rq); | 7847 | init_cfs_rq(cfs_rq, rq); |
8061 | cfs_rq->tg = tg; | 7848 | cfs_rq->tg = tg; |
8062 | if (add) | ||
8063 | list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
8064 | 7849 | ||
8065 | tg->se[cpu] = se; | 7850 | tg->se[cpu] = se; |
8066 | /* se could be NULL for init_task_group */ | 7851 | /* se could be NULL for init_task_group */ |
@@ -8073,15 +7858,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
8073 | se->cfs_rq = parent->my_q; | 7858 | se->cfs_rq = parent->my_q; |
8074 | 7859 | ||
8075 | se->my_q = cfs_rq; | 7860 | se->my_q = cfs_rq; |
8076 | se->load.weight = tg->shares; | 7861 | update_load_set(&se->load, 0); |
8077 | se->load.inv_weight = 0; | ||
8078 | se->parent = parent; | 7862 | se->parent = parent; |
8079 | } | 7863 | } |
8080 | #endif | 7864 | #endif |
8081 | 7865 | ||
8082 | #ifdef CONFIG_RT_GROUP_SCHED | 7866 | #ifdef CONFIG_RT_GROUP_SCHED |
8083 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | 7867 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, |
8084 | struct sched_rt_entity *rt_se, int cpu, int add, | 7868 | struct sched_rt_entity *rt_se, int cpu, |
8085 | struct sched_rt_entity *parent) | 7869 | struct sched_rt_entity *parent) |
8086 | { | 7870 | { |
8087 | struct rq *rq = cpu_rq(cpu); | 7871 | struct rq *rq = cpu_rq(cpu); |
@@ -8090,8 +7874,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
8090 | init_rt_rq(rt_rq, rq); | 7874 | init_rt_rq(rt_rq, rq); |
8091 | rt_rq->tg = tg; | 7875 | rt_rq->tg = tg; |
8092 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | 7876 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; |
8093 | if (add) | ||
8094 | list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
8095 | 7877 | ||
8096 | tg->rt_se[cpu] = rt_se; | 7878 | tg->rt_se[cpu] = rt_se; |
8097 | if (!rt_se) | 7879 | if (!rt_se) |
@@ -8164,13 +7946,9 @@ void __init sched_init(void) | |||
8164 | #ifdef CONFIG_CGROUP_SCHED | 7946 | #ifdef CONFIG_CGROUP_SCHED |
8165 | list_add(&init_task_group.list, &task_groups); | 7947 | list_add(&init_task_group.list, &task_groups); |
8166 | INIT_LIST_HEAD(&init_task_group.children); | 7948 | INIT_LIST_HEAD(&init_task_group.children); |
8167 | 7949 | autogroup_init(&init_task); | |
8168 | #endif /* CONFIG_CGROUP_SCHED */ | 7950 | #endif /* CONFIG_CGROUP_SCHED */ |
8169 | 7951 | ||
8170 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
8171 | update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), | ||
8172 | __alignof__(unsigned long)); | ||
8173 | #endif | ||
8174 | for_each_possible_cpu(i) { | 7952 | for_each_possible_cpu(i) { |
8175 | struct rq *rq; | 7953 | struct rq *rq; |
8176 | 7954 | ||
@@ -8184,7 +7962,6 @@ void __init sched_init(void) | |||
8184 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7962 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8185 | init_task_group.shares = init_task_group_load; | 7963 | init_task_group.shares = init_task_group_load; |
8186 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 7964 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
8187 | #ifdef CONFIG_CGROUP_SCHED | ||
8188 | /* | 7965 | /* |
8189 | * How much cpu bandwidth does init_task_group get? | 7966 | * How much cpu bandwidth does init_task_group get? |
8190 | * | 7967 | * |
@@ -8204,16 +7981,13 @@ void __init sched_init(void) | |||
8204 | * We achieve this by letting init_task_group's tasks sit | 7981 | * We achieve this by letting init_task_group's tasks sit |
8205 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 7982 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). |
8206 | */ | 7983 | */ |
8207 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); | 7984 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL); |
8208 | #endif | ||
8209 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7985 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8210 | 7986 | ||
8211 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 7987 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
8212 | #ifdef CONFIG_RT_GROUP_SCHED | 7988 | #ifdef CONFIG_RT_GROUP_SCHED |
8213 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7989 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
8214 | #ifdef CONFIG_CGROUP_SCHED | 7990 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL); |
8215 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); | ||
8216 | #endif | ||
8217 | #endif | 7991 | #endif |
8218 | 7992 | ||
8219 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7993 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
@@ -8486,7 +8260,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8486 | if (!se) | 8260 | if (!se) |
8487 | goto err_free_rq; | 8261 | goto err_free_rq; |
8488 | 8262 | ||
8489 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 8263 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
8490 | } | 8264 | } |
8491 | 8265 | ||
8492 | return 1; | 8266 | return 1; |
@@ -8497,15 +8271,21 @@ err: | |||
8497 | return 0; | 8271 | return 0; |
8498 | } | 8272 | } |
8499 | 8273 | ||
8500 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8501 | { | ||
8502 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
8503 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
8504 | } | ||
8505 | |||
8506 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8274 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8507 | { | 8275 | { |
8508 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8276 | struct rq *rq = cpu_rq(cpu); |
8277 | unsigned long flags; | ||
8278 | |||
8279 | /* | ||
8280 | * Only empty task groups can be destroyed; so we can speculatively | ||
8281 | * check on_list without danger of it being re-added. | ||
8282 | */ | ||
8283 | if (!tg->cfs_rq[cpu]->on_list) | ||
8284 | return; | ||
8285 | |||
8286 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8287 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | ||
8288 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8509 | } | 8289 | } |
8510 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8290 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8511 | static inline void free_fair_sched_group(struct task_group *tg) | 8291 | static inline void free_fair_sched_group(struct task_group *tg) |
@@ -8518,10 +8298,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8518 | return 1; | 8298 | return 1; |
8519 | } | 8299 | } |
8520 | 8300 | ||
8521 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8522 | { | ||
8523 | } | ||
8524 | |||
8525 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8301 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8526 | { | 8302 | { |
8527 | } | 8303 | } |
@@ -8576,7 +8352,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8576 | if (!rt_se) | 8352 | if (!rt_se) |
8577 | goto err_free_rq; | 8353 | goto err_free_rq; |
8578 | 8354 | ||
8579 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 8355 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
8580 | } | 8356 | } |
8581 | 8357 | ||
8582 | return 1; | 8358 | return 1; |
@@ -8586,17 +8362,6 @@ err_free_rq: | |||
8586 | err: | 8362 | err: |
8587 | return 0; | 8363 | return 0; |
8588 | } | 8364 | } |
8589 | |||
8590 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8591 | { | ||
8592 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
8593 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
8594 | } | ||
8595 | |||
8596 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8597 | { | ||
8598 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
8599 | } | ||
8600 | #else /* !CONFIG_RT_GROUP_SCHED */ | 8365 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8601 | static inline void free_rt_sched_group(struct task_group *tg) | 8366 | static inline void free_rt_sched_group(struct task_group *tg) |
8602 | { | 8367 | { |
@@ -8607,14 +8372,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8607 | { | 8372 | { |
8608 | return 1; | 8373 | return 1; |
8609 | } | 8374 | } |
8610 | |||
8611 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8612 | { | ||
8613 | } | ||
8614 | |||
8615 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8616 | { | ||
8617 | } | ||
8618 | #endif /* CONFIG_RT_GROUP_SCHED */ | 8375 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8619 | 8376 | ||
8620 | #ifdef CONFIG_CGROUP_SCHED | 8377 | #ifdef CONFIG_CGROUP_SCHED |
@@ -8630,7 +8387,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8630 | { | 8387 | { |
8631 | struct task_group *tg; | 8388 | struct task_group *tg; |
8632 | unsigned long flags; | 8389 | unsigned long flags; |
8633 | int i; | ||
8634 | 8390 | ||
8635 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 8391 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
8636 | if (!tg) | 8392 | if (!tg) |
@@ -8643,10 +8399,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8643 | goto err; | 8399 | goto err; |
8644 | 8400 | ||
8645 | spin_lock_irqsave(&task_group_lock, flags); | 8401 | spin_lock_irqsave(&task_group_lock, flags); |
8646 | for_each_possible_cpu(i) { | ||
8647 | register_fair_sched_group(tg, i); | ||
8648 | register_rt_sched_group(tg, i); | ||
8649 | } | ||
8650 | list_add_rcu(&tg->list, &task_groups); | 8402 | list_add_rcu(&tg->list, &task_groups); |
8651 | 8403 | ||
8652 | WARN_ON(!parent); /* root should already exist */ | 8404 | WARN_ON(!parent); /* root should already exist */ |
@@ -8676,11 +8428,11 @@ void sched_destroy_group(struct task_group *tg) | |||
8676 | unsigned long flags; | 8428 | unsigned long flags; |
8677 | int i; | 8429 | int i; |
8678 | 8430 | ||
8679 | spin_lock_irqsave(&task_group_lock, flags); | 8431 | /* end participation in shares distribution */ |
8680 | for_each_possible_cpu(i) { | 8432 | for_each_possible_cpu(i) |
8681 | unregister_fair_sched_group(tg, i); | 8433 | unregister_fair_sched_group(tg, i); |
8682 | unregister_rt_sched_group(tg, i); | 8434 | |
8683 | } | 8435 | spin_lock_irqsave(&task_group_lock, flags); |
8684 | list_del_rcu(&tg->list); | 8436 | list_del_rcu(&tg->list); |
8685 | list_del_rcu(&tg->siblings); | 8437 | list_del_rcu(&tg->siblings); |
8686 | spin_unlock_irqrestore(&task_group_lock, flags); | 8438 | spin_unlock_irqrestore(&task_group_lock, flags); |
@@ -8727,33 +8479,6 @@ void sched_move_task(struct task_struct *tsk) | |||
8727 | #endif /* CONFIG_CGROUP_SCHED */ | 8479 | #endif /* CONFIG_CGROUP_SCHED */ |
8728 | 8480 | ||
8729 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8481 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8730 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8731 | { | ||
8732 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8733 | int on_rq; | ||
8734 | |||
8735 | on_rq = se->on_rq; | ||
8736 | if (on_rq) | ||
8737 | dequeue_entity(cfs_rq, se, 0); | ||
8738 | |||
8739 | se->load.weight = shares; | ||
8740 | se->load.inv_weight = 0; | ||
8741 | |||
8742 | if (on_rq) | ||
8743 | enqueue_entity(cfs_rq, se, 0); | ||
8744 | } | ||
8745 | |||
8746 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8747 | { | ||
8748 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8749 | struct rq *rq = cfs_rq->rq; | ||
8750 | unsigned long flags; | ||
8751 | |||
8752 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8753 | __set_se_shares(se, shares); | ||
8754 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8755 | } | ||
8756 | |||
8757 | static DEFINE_MUTEX(shares_mutex); | 8482 | static DEFINE_MUTEX(shares_mutex); |
8758 | 8483 | ||
8759 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 8484 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
@@ -8776,37 +8501,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8776 | if (tg->shares == shares) | 8501 | if (tg->shares == shares) |
8777 | goto done; | 8502 | goto done; |
8778 | 8503 | ||
8779 | spin_lock_irqsave(&task_group_lock, flags); | ||
8780 | for_each_possible_cpu(i) | ||
8781 | unregister_fair_sched_group(tg, i); | ||
8782 | list_del_rcu(&tg->siblings); | ||
8783 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8784 | |||
8785 | /* wait for any ongoing reference to this group to finish */ | ||
8786 | synchronize_sched(); | ||
8787 | |||
8788 | /* | ||
8789 | * Now we are free to modify the group's share on each cpu | ||
8790 | * w/o tripping rebalance_share or load_balance_fair. | ||
8791 | */ | ||
8792 | tg->shares = shares; | 8504 | tg->shares = shares; |
8793 | for_each_possible_cpu(i) { | 8505 | for_each_possible_cpu(i) { |
8794 | /* | 8506 | struct rq *rq = cpu_rq(i); |
8795 | * force a rebalance | 8507 | struct sched_entity *se; |
8796 | */ | 8508 | |
8797 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | 8509 | se = tg->se[i]; |
8798 | set_se_shares(tg->se[i], shares); | 8510 | /* Propagate contribution to hierarchy */ |
8511 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8512 | for_each_sched_entity(se) | ||
8513 | update_cfs_shares(group_cfs_rq(se), 0); | ||
8514 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8799 | } | 8515 | } |
8800 | 8516 | ||
8801 | /* | ||
8802 | * Enable load balance activity on this group, by inserting it back on | ||
8803 | * each cpu's rq->leaf_cfs_rq_list. | ||
8804 | */ | ||
8805 | spin_lock_irqsave(&task_group_lock, flags); | ||
8806 | for_each_possible_cpu(i) | ||
8807 | register_fair_sched_group(tg, i); | ||
8808 | list_add_rcu(&tg->siblings, &tg->parent->children); | ||
8809 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8810 | done: | 8517 | done: |
8811 | mutex_unlock(&shares_mutex); | 8518 | mutex_unlock(&shares_mutex); |
8812 | return 0; | 8519 | return 0; |
@@ -9532,72 +9239,3 @@ struct cgroup_subsys cpuacct_subsys = { | |||
9532 | }; | 9239 | }; |
9533 | #endif /* CONFIG_CGROUP_CPUACCT */ | 9240 | #endif /* CONFIG_CGROUP_CPUACCT */ |
9534 | 9241 | ||
9535 | #ifndef CONFIG_SMP | ||
9536 | |||
9537 | void synchronize_sched_expedited(void) | ||
9538 | { | ||
9539 | barrier(); | ||
9540 | } | ||
9541 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
9542 | |||
9543 | #else /* #ifndef CONFIG_SMP */ | ||
9544 | |||
9545 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); | ||
9546 | |||
9547 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
9548 | { | ||
9549 | /* | ||
9550 | * There must be a full memory barrier on each affected CPU | ||
9551 | * between the time that try_stop_cpus() is called and the | ||
9552 | * time that it returns. | ||
9553 | * | ||
9554 | * In the current initial implementation of cpu_stop, the | ||
9555 | * above condition is already met when the control reaches | ||
9556 | * this point and the following smp_mb() is not strictly | ||
9557 | * necessary. Do smp_mb() anyway for documentation and | ||
9558 | * robustness against future implementation changes. | ||
9559 | */ | ||
9560 | smp_mb(); /* See above comment block. */ | ||
9561 | return 0; | ||
9562 | } | ||
9563 | |||
9564 | /* | ||
9565 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
9566 | * approach to force grace period to end quickly. This consumes | ||
9567 | * significant time on all CPUs, and is thus not recommended for | ||
9568 | * any sort of common-case code. | ||
9569 | * | ||
9570 | * Note that it is illegal to call this function while holding any | ||
9571 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
9572 | * observe this restriction will result in deadlock. | ||
9573 | */ | ||
9574 | void synchronize_sched_expedited(void) | ||
9575 | { | ||
9576 | int snap, trycount = 0; | ||
9577 | |||
9578 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | ||
9579 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; | ||
9580 | get_online_cpus(); | ||
9581 | while (try_stop_cpus(cpu_online_mask, | ||
9582 | synchronize_sched_expedited_cpu_stop, | ||
9583 | NULL) == -EAGAIN) { | ||
9584 | put_online_cpus(); | ||
9585 | if (trycount++ < 10) | ||
9586 | udelay(trycount * num_online_cpus()); | ||
9587 | else { | ||
9588 | synchronize_sched(); | ||
9589 | return; | ||
9590 | } | ||
9591 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { | ||
9592 | smp_mb(); /* ensure test happens before caller kfree */ | ||
9593 | return; | ||
9594 | } | ||
9595 | get_online_cpus(); | ||
9596 | } | ||
9597 | atomic_inc(&synchronize_sched_expedited_count); | ||
9598 | smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ | ||
9599 | put_online_cpus(); | ||
9600 | } | ||
9601 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
9602 | |||
9603 | #endif /* #else #ifndef CONFIG_SMP */ | ||
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c new file mode 100644 index 000000000000..c80fedcd476b --- /dev/null +++ b/kernel/sched_autogroup.c | |||
@@ -0,0 +1,238 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2 | |||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/utsname.h> | ||
7 | |||
8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | ||
9 | static struct autogroup autogroup_default; | ||
10 | static atomic_t autogroup_seq_nr; | ||
11 | |||
12 | static void autogroup_init(struct task_struct *init_task) | ||
13 | { | ||
14 | autogroup_default.tg = &init_task_group; | ||
15 | init_task_group.autogroup = &autogroup_default; | ||
16 | kref_init(&autogroup_default.kref); | ||
17 | init_rwsem(&autogroup_default.lock); | ||
18 | init_task->signal->autogroup = &autogroup_default; | ||
19 | } | ||
20 | |||
21 | static inline void autogroup_free(struct task_group *tg) | ||
22 | { | ||
23 | kfree(tg->autogroup); | ||
24 | } | ||
25 | |||
26 | static inline void autogroup_destroy(struct kref *kref) | ||
27 | { | ||
28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | ||
29 | |||
30 | sched_destroy_group(ag->tg); | ||
31 | } | ||
32 | |||
33 | static inline void autogroup_kref_put(struct autogroup *ag) | ||
34 | { | ||
35 | kref_put(&ag->kref, autogroup_destroy); | ||
36 | } | ||
37 | |||
38 | static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) | ||
39 | { | ||
40 | kref_get(&ag->kref); | ||
41 | return ag; | ||
42 | } | ||
43 | |||
44 | static inline struct autogroup *autogroup_task_get(struct task_struct *p) | ||
45 | { | ||
46 | struct autogroup *ag; | ||
47 | unsigned long flags; | ||
48 | |||
49 | if (!lock_task_sighand(p, &flags)) | ||
50 | return autogroup_kref_get(&autogroup_default); | ||
51 | |||
52 | ag = autogroup_kref_get(p->signal->autogroup); | ||
53 | unlock_task_sighand(p, &flags); | ||
54 | |||
55 | return ag; | ||
56 | } | ||
57 | |||
58 | static inline struct autogroup *autogroup_create(void) | ||
59 | { | ||
60 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | ||
61 | struct task_group *tg; | ||
62 | |||
63 | if (!ag) | ||
64 | goto out_fail; | ||
65 | |||
66 | tg = sched_create_group(&init_task_group); | ||
67 | |||
68 | if (IS_ERR(tg)) | ||
69 | goto out_free; | ||
70 | |||
71 | kref_init(&ag->kref); | ||
72 | init_rwsem(&ag->lock); | ||
73 | ag->id = atomic_inc_return(&autogroup_seq_nr); | ||
74 | ag->tg = tg; | ||
75 | tg->autogroup = ag; | ||
76 | |||
77 | return ag; | ||
78 | |||
79 | out_free: | ||
80 | kfree(ag); | ||
81 | out_fail: | ||
82 | if (printk_ratelimit()) { | ||
83 | printk(KERN_WARNING "autogroup_create: %s failure.\n", | ||
84 | ag ? "sched_create_group()" : "kmalloc()"); | ||
85 | } | ||
86 | |||
87 | return autogroup_kref_get(&autogroup_default); | ||
88 | } | ||
89 | |||
90 | static inline bool | ||
91 | task_wants_autogroup(struct task_struct *p, struct task_group *tg) | ||
92 | { | ||
93 | if (tg != &root_task_group) | ||
94 | return false; | ||
95 | |||
96 | if (p->sched_class != &fair_sched_class) | ||
97 | return false; | ||
98 | |||
99 | /* | ||
100 | * We can only assume the task group can't go away on us if | ||
101 | * autogroup_move_group() can see us on ->thread_group list. | ||
102 | */ | ||
103 | if (p->flags & PF_EXITING) | ||
104 | return false; | ||
105 | |||
106 | return true; | ||
107 | } | ||
108 | |||
109 | static inline struct task_group * | ||
110 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | ||
111 | { | ||
112 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
113 | |||
114 | if (enabled && task_wants_autogroup(p, tg)) | ||
115 | return p->signal->autogroup->tg; | ||
116 | |||
117 | return tg; | ||
118 | } | ||
119 | |||
120 | static void | ||
121 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) | ||
122 | { | ||
123 | struct autogroup *prev; | ||
124 | struct task_struct *t; | ||
125 | unsigned long flags; | ||
126 | |||
127 | BUG_ON(!lock_task_sighand(p, &flags)); | ||
128 | |||
129 | prev = p->signal->autogroup; | ||
130 | if (prev == ag) { | ||
131 | unlock_task_sighand(p, &flags); | ||
132 | return; | ||
133 | } | ||
134 | |||
135 | p->signal->autogroup = autogroup_kref_get(ag); | ||
136 | |||
137 | t = p; | ||
138 | do { | ||
139 | sched_move_task(t); | ||
140 | } while_each_thread(p, t); | ||
141 | |||
142 | unlock_task_sighand(p, &flags); | ||
143 | autogroup_kref_put(prev); | ||
144 | } | ||
145 | |||
146 | /* Allocates GFP_KERNEL, cannot be called under any spinlock */ | ||
147 | void sched_autogroup_create_attach(struct task_struct *p) | ||
148 | { | ||
149 | struct autogroup *ag = autogroup_create(); | ||
150 | |||
151 | autogroup_move_group(p, ag); | ||
152 | /* drop extra refrence added by autogroup_create() */ | ||
153 | autogroup_kref_put(ag); | ||
154 | } | ||
155 | EXPORT_SYMBOL(sched_autogroup_create_attach); | ||
156 | |||
157 | /* Cannot be called under siglock. Currently has no users */ | ||
158 | void sched_autogroup_detach(struct task_struct *p) | ||
159 | { | ||
160 | autogroup_move_group(p, &autogroup_default); | ||
161 | } | ||
162 | EXPORT_SYMBOL(sched_autogroup_detach); | ||
163 | |||
164 | void sched_autogroup_fork(struct signal_struct *sig) | ||
165 | { | ||
166 | sig->autogroup = autogroup_task_get(current); | ||
167 | } | ||
168 | |||
169 | void sched_autogroup_exit(struct signal_struct *sig) | ||
170 | { | ||
171 | autogroup_kref_put(sig->autogroup); | ||
172 | } | ||
173 | |||
174 | static int __init setup_autogroup(char *str) | ||
175 | { | ||
176 | sysctl_sched_autogroup_enabled = 0; | ||
177 | |||
178 | return 1; | ||
179 | } | ||
180 | |||
181 | __setup("noautogroup", setup_autogroup); | ||
182 | |||
183 | #ifdef CONFIG_PROC_FS | ||
184 | |||
185 | int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | ||
186 | { | ||
187 | static unsigned long next = INITIAL_JIFFIES; | ||
188 | struct autogroup *ag; | ||
189 | int err; | ||
190 | |||
191 | if (*nice < -20 || *nice > 19) | ||
192 | return -EINVAL; | ||
193 | |||
194 | err = security_task_setnice(current, *nice); | ||
195 | if (err) | ||
196 | return err; | ||
197 | |||
198 | if (*nice < 0 && !can_nice(current, *nice)) | ||
199 | return -EPERM; | ||
200 | |||
201 | /* this is a heavy operation taking global locks.. */ | ||
202 | if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) | ||
203 | return -EAGAIN; | ||
204 | |||
205 | next = HZ / 10 + jiffies; | ||
206 | ag = autogroup_task_get(p); | ||
207 | |||
208 | down_write(&ag->lock); | ||
209 | err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); | ||
210 | if (!err) | ||
211 | ag->nice = *nice; | ||
212 | up_write(&ag->lock); | ||
213 | |||
214 | autogroup_kref_put(ag); | ||
215 | |||
216 | return err; | ||
217 | } | ||
218 | |||
219 | void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | ||
220 | { | ||
221 | struct autogroup *ag = autogroup_task_get(p); | ||
222 | |||
223 | down_read(&ag->lock); | ||
224 | seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); | ||
225 | up_read(&ag->lock); | ||
226 | |||
227 | autogroup_kref_put(ag); | ||
228 | } | ||
229 | #endif /* CONFIG_PROC_FS */ | ||
230 | |||
231 | #ifdef CONFIG_SCHED_DEBUG | ||
232 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||
233 | { | ||
234 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | ||
235 | } | ||
236 | #endif /* CONFIG_SCHED_DEBUG */ | ||
237 | |||
238 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h new file mode 100644 index 000000000000..5358e241cb20 --- /dev/null +++ b/kernel/sched_autogroup.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2 | |||
3 | struct autogroup { | ||
4 | struct kref kref; | ||
5 | struct task_group *tg; | ||
6 | struct rw_semaphore lock; | ||
7 | unsigned long id; | ||
8 | int nice; | ||
9 | }; | ||
10 | |||
11 | static inline struct task_group * | ||
12 | autogroup_task_group(struct task_struct *p, struct task_group *tg); | ||
13 | |||
14 | #else /* !CONFIG_SCHED_AUTOGROUP */ | ||
15 | |||
16 | static inline void autogroup_init(struct task_struct *init_task) { } | ||
17 | static inline void autogroup_free(struct task_group *tg) { } | ||
18 | |||
19 | static inline struct task_group * | ||
20 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | ||
21 | { | ||
22 | return tg; | ||
23 | } | ||
24 | |||
25 | #ifdef CONFIG_SCHED_DEBUG | ||
26 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||
27 | { | ||
28 | return 0; | ||
29 | } | ||
30 | #endif | ||
31 | |||
32 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 52f1a149bfb1..9d8af0b3fb64 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(sched_clock); | 80 | EXPORT_SYMBOL_GPL(sched_clock); |
81 | 81 | ||
82 | static __read_mostly int sched_clock_running; | 82 | __read_mostly int sched_clock_running; |
83 | 83 | ||
84 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 84 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
85 | __read_mostly int sched_clock_stable; | 85 | __read_mostly int sched_clock_stable; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 2e1b0d17dd9b..1dfae3d014b5 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec) | |||
54 | #define SPLIT_NS(x) nsec_high(x), nsec_low(x) | 54 | #define SPLIT_NS(x) nsec_high(x), nsec_low(x) |
55 | 55 | ||
56 | #ifdef CONFIG_FAIR_GROUP_SCHED | 56 | #ifdef CONFIG_FAIR_GROUP_SCHED |
57 | static void print_cfs_group_stats(struct seq_file *m, int cpu, | 57 | static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) |
58 | struct task_group *tg) | ||
59 | { | 58 | { |
60 | struct sched_entity *se = tg->se[cpu]; | 59 | struct sched_entity *se = tg->se[cpu]; |
61 | if (!se) | 60 | if (!se) |
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
110 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 109 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); |
111 | #endif | 110 | #endif |
112 | 111 | ||
113 | #ifdef CONFIG_CGROUP_SCHED | ||
114 | { | ||
115 | char path[64]; | ||
116 | |||
117 | rcu_read_lock(); | ||
118 | cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); | ||
119 | rcu_read_unlock(); | ||
120 | SEQ_printf(m, " %s", path); | ||
121 | } | ||
122 | #endif | ||
123 | SEQ_printf(m, "\n"); | 112 | SEQ_printf(m, "\n"); |
124 | } | 113 | } |
125 | 114 | ||
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
147 | read_unlock_irqrestore(&tasklist_lock, flags); | 136 | read_unlock_irqrestore(&tasklist_lock, flags); |
148 | } | 137 | } |
149 | 138 | ||
150 | #if defined(CONFIG_CGROUP_SCHED) && \ | ||
151 | (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) | ||
152 | static void task_group_path(struct task_group *tg, char *buf, int buflen) | ||
153 | { | ||
154 | /* may be NULL if the underlying cgroup isn't fully-created yet */ | ||
155 | if (!tg->css.cgroup) { | ||
156 | buf[0] = '\0'; | ||
157 | return; | ||
158 | } | ||
159 | cgroup_path(tg->css.cgroup, buf, buflen); | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | 139 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) |
164 | { | 140 | { |
165 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, | 141 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, |
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
168 | struct sched_entity *last; | 144 | struct sched_entity *last; |
169 | unsigned long flags; | 145 | unsigned long flags; |
170 | 146 | ||
171 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) | ||
172 | char path[128]; | ||
173 | struct task_group *tg = cfs_rq->tg; | ||
174 | |||
175 | task_group_path(tg, path, sizeof(path)); | ||
176 | |||
177 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); | ||
178 | #else | ||
179 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | 147 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); |
180 | #endif | ||
181 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 148 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
182 | SPLIT_NS(cfs_rq->exec_clock)); | 149 | SPLIT_NS(cfs_rq->exec_clock)); |
183 | 150 | ||
@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
202 | spread0 = min_vruntime - rq0_min_vruntime; | 169 | spread0 = min_vruntime - rq0_min_vruntime; |
203 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", | 170 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", |
204 | SPLIT_NS(spread0)); | 171 | SPLIT_NS(spread0)); |
205 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); | ||
206 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); | ||
207 | |||
208 | SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", | 172 | SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", |
209 | cfs_rq->nr_spread_over); | 173 | cfs_rq->nr_spread_over); |
174 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); | ||
175 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); | ||
210 | #ifdef CONFIG_FAIR_GROUP_SCHED | 176 | #ifdef CONFIG_FAIR_GROUP_SCHED |
211 | #ifdef CONFIG_SMP | 177 | #ifdef CONFIG_SMP |
212 | SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); | 178 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", |
179 | SPLIT_NS(cfs_rq->load_avg)); | ||
180 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", | ||
181 | SPLIT_NS(cfs_rq->load_period)); | ||
182 | SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", | ||
183 | cfs_rq->load_contribution); | ||
184 | SEQ_printf(m, " .%-30s: %d\n", "load_tg", | ||
185 | atomic_read(&cfs_rq->tg->load_weight)); | ||
213 | #endif | 186 | #endif |
187 | |||
214 | print_cfs_group_stats(m, cpu, cfs_rq->tg); | 188 | print_cfs_group_stats(m, cpu, cfs_rq->tg); |
215 | #endif | 189 | #endif |
216 | } | 190 | } |
217 | 191 | ||
218 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | 192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
219 | { | 193 | { |
220 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) | ||
221 | char path[128]; | ||
222 | struct task_group *tg = rt_rq->tg; | ||
223 | |||
224 | task_group_path(tg, path, sizeof(path)); | ||
225 | |||
226 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); | ||
227 | #else | ||
228 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | 194 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); |
229 | #endif | ||
230 | |||
231 | 195 | ||
232 | #define P(x) \ | 196 | #define P(x) \ |
233 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | 197 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |||
243 | #undef P | 207 | #undef P |
244 | } | 208 | } |
245 | 209 | ||
210 | extern __read_mostly int sched_clock_running; | ||
211 | |||
246 | static void print_cpu(struct seq_file *m, int cpu) | 212 | static void print_cpu(struct seq_file *m, int cpu) |
247 | { | 213 | { |
248 | struct rq *rq = cpu_rq(cpu); | 214 | struct rq *rq = cpu_rq(cpu); |
@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = { | |||
314 | 280 | ||
315 | static int sched_debug_show(struct seq_file *m, void *v) | 281 | static int sched_debug_show(struct seq_file *m, void *v) |
316 | { | 282 | { |
317 | u64 now = ktime_to_ns(ktime_get()); | 283 | u64 ktime, sched_clk, cpu_clk; |
284 | unsigned long flags; | ||
318 | int cpu; | 285 | int cpu; |
319 | 286 | ||
320 | SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n", | 287 | local_irq_save(flags); |
288 | ktime = ktime_to_ns(ktime_get()); | ||
289 | sched_clk = sched_clock(); | ||
290 | cpu_clk = local_clock(); | ||
291 | local_irq_restore(flags); | ||
292 | |||
293 | SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n", | ||
321 | init_utsname()->release, | 294 | init_utsname()->release, |
322 | (int)strcspn(init_utsname()->version, " "), | 295 | (int)strcspn(init_utsname()->version, " "), |
323 | init_utsname()->version); | 296 | init_utsname()->version); |
324 | 297 | ||
325 | SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); | 298 | #define P(x) \ |
299 | SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) | ||
300 | #define PN(x) \ | ||
301 | SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) | ||
302 | PN(ktime); | ||
303 | PN(sched_clk); | ||
304 | PN(cpu_clk); | ||
305 | P(jiffies); | ||
306 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
307 | P(sched_clock_stable); | ||
308 | #endif | ||
309 | #undef PN | ||
310 | #undef P | ||
311 | |||
312 | SEQ_printf(m, "\n"); | ||
313 | SEQ_printf(m, "sysctl_sched\n"); | ||
326 | 314 | ||
327 | #define P(x) \ | 315 | #define P(x) \ |
328 | SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) | 316 | SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) |
329 | #define PN(x) \ | 317 | #define PN(x) \ |
330 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) | 318 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) |
331 | P(jiffies); | ||
332 | PN(sysctl_sched_latency); | 319 | PN(sysctl_sched_latency); |
333 | PN(sysctl_sched_min_granularity); | 320 | PN(sysctl_sched_min_granularity); |
334 | PN(sysctl_sched_wakeup_granularity); | 321 | PN(sysctl_sched_wakeup_granularity); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 00ebd7686676..c62ebae65cf0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | |||
89 | 89 | ||
90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
91 | 91 | ||
92 | /* | ||
93 | * The exponential sliding window over which load is averaged for shares | ||
94 | * distribution. | ||
95 | * (default: 10msec) | ||
96 | */ | ||
97 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | ||
98 | |||
92 | static const struct sched_class fair_sched_class; | 99 | static const struct sched_class fair_sched_class; |
93 | 100 | ||
94 | /************************************************************** | 101 | /************************************************************** |
@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | |||
143 | return cfs_rq->tg->cfs_rq[this_cpu]; | 150 | return cfs_rq->tg->cfs_rq[this_cpu]; |
144 | } | 151 | } |
145 | 152 | ||
153 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
154 | { | ||
155 | if (!cfs_rq->on_list) { | ||
156 | /* | ||
157 | * Ensure we either appear before our parent (if already | ||
158 | * enqueued) or force our parent to appear after us when it is | ||
159 | * enqueued. The fact that we always enqueue bottom-up | ||
160 | * reduces this to two cases. | ||
161 | */ | ||
162 | if (cfs_rq->tg->parent && | ||
163 | cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) { | ||
164 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, | ||
165 | &rq_of(cfs_rq)->leaf_cfs_rq_list); | ||
166 | } else { | ||
167 | list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list, | ||
168 | &rq_of(cfs_rq)->leaf_cfs_rq_list); | ||
169 | } | ||
170 | |||
171 | cfs_rq->on_list = 1; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
176 | { | ||
177 | if (cfs_rq->on_list) { | ||
178 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | ||
179 | cfs_rq->on_list = 0; | ||
180 | } | ||
181 | } | ||
182 | |||
146 | /* Iterate thr' all leaf cfs_rq's on a runqueue */ | 183 | /* Iterate thr' all leaf cfs_rq's on a runqueue */ |
147 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | 184 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ |
148 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) | 185 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) |
@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | |||
246 | return &cpu_rq(this_cpu)->cfs; | 283 | return &cpu_rq(this_cpu)->cfs; |
247 | } | 284 | } |
248 | 285 | ||
286 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
287 | { | ||
288 | } | ||
289 | |||
290 | static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
291 | { | ||
292 | } | ||
293 | |||
249 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | 294 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ |
250 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) | 295 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) |
251 | 296 | ||
@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write, | |||
417 | WRT_SYSCTL(sched_min_granularity); | 462 | WRT_SYSCTL(sched_min_granularity); |
418 | WRT_SYSCTL(sched_latency); | 463 | WRT_SYSCTL(sched_latency); |
419 | WRT_SYSCTL(sched_wakeup_granularity); | 464 | WRT_SYSCTL(sched_wakeup_granularity); |
420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
421 | #undef WRT_SYSCTL | 465 | #undef WRT_SYSCTL |
422 | 466 | ||
423 | return 0; | 467 | return 0; |
@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
495 | return calc_delta_fair(sched_slice(cfs_rq, se), se); | 539 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
496 | } | 540 | } |
497 | 541 | ||
542 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); | ||
543 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); | ||
544 | |||
498 | /* | 545 | /* |
499 | * Update the current task's runtime statistics. Skip current tasks that | 546 | * Update the current task's runtime statistics. Skip current tasks that |
500 | * are not in our scheduling class. | 547 | * are not in our scheduling class. |
@@ -514,6 +561,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
514 | 561 | ||
515 | curr->vruntime += delta_exec_weighted; | 562 | curr->vruntime += delta_exec_weighted; |
516 | update_min_vruntime(cfs_rq); | 563 | update_min_vruntime(cfs_rq); |
564 | |||
565 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
566 | cfs_rq->load_unacc_exec_time += delta_exec; | ||
567 | #endif | ||
517 | } | 568 | } |
518 | 569 | ||
519 | static void update_curr(struct cfs_rq *cfs_rq) | 570 | static void update_curr(struct cfs_rq *cfs_rq) |
@@ -633,7 +684,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
633 | list_add(&se->group_node, &cfs_rq->tasks); | 684 | list_add(&se->group_node, &cfs_rq->tasks); |
634 | } | 685 | } |
635 | cfs_rq->nr_running++; | 686 | cfs_rq->nr_running++; |
636 | se->on_rq = 1; | ||
637 | } | 687 | } |
638 | 688 | ||
639 | static void | 689 | static void |
@@ -647,9 +697,140 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
647 | list_del_init(&se->group_node); | 697 | list_del_init(&se->group_node); |
648 | } | 698 | } |
649 | cfs_rq->nr_running--; | 699 | cfs_rq->nr_running--; |
650 | se->on_rq = 0; | ||
651 | } | 700 | } |
652 | 701 | ||
702 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
703 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | ||
704 | int global_update) | ||
705 | { | ||
706 | struct task_group *tg = cfs_rq->tg; | ||
707 | long load_avg; | ||
708 | |||
709 | load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); | ||
710 | load_avg -= cfs_rq->load_contribution; | ||
711 | |||
712 | if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) { | ||
713 | atomic_add(load_avg, &tg->load_weight); | ||
714 | cfs_rq->load_contribution += load_avg; | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
719 | { | ||
720 | u64 period = sysctl_sched_shares_window; | ||
721 | u64 now, delta; | ||
722 | unsigned long load = cfs_rq->load.weight; | ||
723 | |||
724 | if (!cfs_rq) | ||
725 | return; | ||
726 | |||
727 | now = rq_of(cfs_rq)->clock; | ||
728 | delta = now - cfs_rq->load_stamp; | ||
729 | |||
730 | /* truncate load history at 4 idle periods */ | ||
731 | if (cfs_rq->load_stamp > cfs_rq->load_last && | ||
732 | now - cfs_rq->load_last > 4 * period) { | ||
733 | cfs_rq->load_period = 0; | ||
734 | cfs_rq->load_avg = 0; | ||
735 | } | ||
736 | |||
737 | cfs_rq->load_stamp = now; | ||
738 | cfs_rq->load_unacc_exec_time = 0; | ||
739 | cfs_rq->load_period += delta; | ||
740 | if (load) { | ||
741 | cfs_rq->load_last = now; | ||
742 | cfs_rq->load_avg += delta * load; | ||
743 | } | ||
744 | |||
745 | /* consider updating load contribution on each fold or truncate */ | ||
746 | if (global_update || cfs_rq->load_period > period | ||
747 | || !cfs_rq->load_period) | ||
748 | update_cfs_rq_load_contribution(cfs_rq, global_update); | ||
749 | |||
750 | while (cfs_rq->load_period > period) { | ||
751 | /* | ||
752 | * Inline assembly required to prevent the compiler | ||
753 | * optimising this loop into a divmod call. | ||
754 | * See __iter_div_u64_rem() for another example of this. | ||
755 | */ | ||
756 | asm("" : "+rm" (cfs_rq->load_period)); | ||
757 | cfs_rq->load_period /= 2; | ||
758 | cfs_rq->load_avg /= 2; | ||
759 | } | ||
760 | |||
761 | if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg) | ||
762 | list_del_leaf_cfs_rq(cfs_rq); | ||
763 | } | ||
764 | |||
765 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | ||
766 | unsigned long weight) | ||
767 | { | ||
768 | if (se->on_rq) { | ||
769 | /* commit outstanding execution time */ | ||
770 | if (cfs_rq->curr == se) | ||
771 | update_curr(cfs_rq); | ||
772 | account_entity_dequeue(cfs_rq, se); | ||
773 | } | ||
774 | |||
775 | update_load_set(&se->load, weight); | ||
776 | |||
777 | if (se->on_rq) | ||
778 | account_entity_enqueue(cfs_rq, se); | ||
779 | } | ||
780 | |||
781 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | ||
782 | { | ||
783 | struct task_group *tg; | ||
784 | struct sched_entity *se; | ||
785 | long load_weight, load, shares; | ||
786 | |||
787 | if (!cfs_rq) | ||
788 | return; | ||
789 | |||
790 | tg = cfs_rq->tg; | ||
791 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | ||
792 | if (!se) | ||
793 | return; | ||
794 | |||
795 | load = cfs_rq->load.weight + weight_delta; | ||
796 | |||
797 | load_weight = atomic_read(&tg->load_weight); | ||
798 | load_weight -= cfs_rq->load_contribution; | ||
799 | load_weight += load; | ||
800 | |||
801 | shares = (tg->shares * load); | ||
802 | if (load_weight) | ||
803 | shares /= load_weight; | ||
804 | |||
805 | if (shares < MIN_SHARES) | ||
806 | shares = MIN_SHARES; | ||
807 | if (shares > tg->shares) | ||
808 | shares = tg->shares; | ||
809 | |||
810 | reweight_entity(cfs_rq_of(se), se, shares); | ||
811 | } | ||
812 | |||
813 | static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
814 | { | ||
815 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
816 | update_cfs_load(cfs_rq, 0); | ||
817 | update_cfs_shares(cfs_rq, 0); | ||
818 | } | ||
819 | } | ||
820 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
821 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
822 | { | ||
823 | } | ||
824 | |||
825 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | ||
826 | { | ||
827 | } | ||
828 | |||
829 | static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
830 | { | ||
831 | } | ||
832 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
833 | |||
653 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 834 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
654 | { | 835 | { |
655 | #ifdef CONFIG_SCHEDSTATS | 836 | #ifdef CONFIG_SCHEDSTATS |
@@ -771,6 +952,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
771 | * Update run-time statistics of the 'current'. | 952 | * Update run-time statistics of the 'current'. |
772 | */ | 953 | */ |
773 | update_curr(cfs_rq); | 954 | update_curr(cfs_rq); |
955 | update_cfs_load(cfs_rq, 0); | ||
956 | update_cfs_shares(cfs_rq, se->load.weight); | ||
774 | account_entity_enqueue(cfs_rq, se); | 957 | account_entity_enqueue(cfs_rq, se); |
775 | 958 | ||
776 | if (flags & ENQUEUE_WAKEUP) { | 959 | if (flags & ENQUEUE_WAKEUP) { |
@@ -782,6 +965,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
782 | check_spread(cfs_rq, se); | 965 | check_spread(cfs_rq, se); |
783 | if (se != cfs_rq->curr) | 966 | if (se != cfs_rq->curr) |
784 | __enqueue_entity(cfs_rq, se); | 967 | __enqueue_entity(cfs_rq, se); |
968 | se->on_rq = 1; | ||
969 | |||
970 | if (cfs_rq->nr_running == 1) | ||
971 | list_add_leaf_cfs_rq(cfs_rq); | ||
785 | } | 972 | } |
786 | 973 | ||
787 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 974 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
@@ -825,8 +1012,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
825 | 1012 | ||
826 | if (se != cfs_rq->curr) | 1013 | if (se != cfs_rq->curr) |
827 | __dequeue_entity(cfs_rq, se); | 1014 | __dequeue_entity(cfs_rq, se); |
1015 | se->on_rq = 0; | ||
1016 | update_cfs_load(cfs_rq, 0); | ||
828 | account_entity_dequeue(cfs_rq, se); | 1017 | account_entity_dequeue(cfs_rq, se); |
829 | update_min_vruntime(cfs_rq); | 1018 | update_min_vruntime(cfs_rq); |
1019 | update_cfs_shares(cfs_rq, 0); | ||
830 | 1020 | ||
831 | /* | 1021 | /* |
832 | * Normalize the entity after updating the min_vruntime because the | 1022 | * Normalize the entity after updating the min_vruntime because the |
@@ -955,6 +1145,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
955 | */ | 1145 | */ |
956 | update_curr(cfs_rq); | 1146 | update_curr(cfs_rq); |
957 | 1147 | ||
1148 | /* | ||
1149 | * Update share accounting for long-running entities. | ||
1150 | */ | ||
1151 | update_entity_shares_tick(cfs_rq); | ||
1152 | |||
958 | #ifdef CONFIG_SCHED_HRTICK | 1153 | #ifdef CONFIG_SCHED_HRTICK |
959 | /* | 1154 | /* |
960 | * queued ticks are scheduled to match the slice, so don't bother | 1155 | * queued ticks are scheduled to match the slice, so don't bother |
@@ -1055,6 +1250,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1055 | flags = ENQUEUE_WAKEUP; | 1250 | flags = ENQUEUE_WAKEUP; |
1056 | } | 1251 | } |
1057 | 1252 | ||
1253 | for_each_sched_entity(se) { | ||
1254 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1255 | |||
1256 | update_cfs_load(cfs_rq, 0); | ||
1257 | update_cfs_shares(cfs_rq, 0); | ||
1258 | } | ||
1259 | |||
1058 | hrtick_update(rq); | 1260 | hrtick_update(rq); |
1059 | } | 1261 | } |
1060 | 1262 | ||
@@ -1071,12 +1273,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1071 | for_each_sched_entity(se) { | 1273 | for_each_sched_entity(se) { |
1072 | cfs_rq = cfs_rq_of(se); | 1274 | cfs_rq = cfs_rq_of(se); |
1073 | dequeue_entity(cfs_rq, se, flags); | 1275 | dequeue_entity(cfs_rq, se, flags); |
1276 | |||
1074 | /* Don't dequeue parent if it has other entities besides us */ | 1277 | /* Don't dequeue parent if it has other entities besides us */ |
1075 | if (cfs_rq->load.weight) | 1278 | if (cfs_rq->load.weight) |
1076 | break; | 1279 | break; |
1077 | flags |= DEQUEUE_SLEEP; | 1280 | flags |= DEQUEUE_SLEEP; |
1078 | } | 1281 | } |
1079 | 1282 | ||
1283 | for_each_sched_entity(se) { | ||
1284 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1285 | |||
1286 | update_cfs_load(cfs_rq, 0); | ||
1287 | update_cfs_shares(cfs_rq, 0); | ||
1288 | } | ||
1289 | |||
1080 | hrtick_update(rq); | 1290 | hrtick_update(rq); |
1081 | } | 1291 | } |
1082 | 1292 | ||
@@ -1143,51 +1353,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) | |||
1143 | * Adding load to a group doesn't make a group heavier, but can cause movement | 1353 | * Adding load to a group doesn't make a group heavier, but can cause movement |
1144 | * of group shares between cpus. Assuming the shares were perfectly aligned one | 1354 | * of group shares between cpus. Assuming the shares were perfectly aligned one |
1145 | * can calculate the shift in shares. | 1355 | * can calculate the shift in shares. |
1146 | * | ||
1147 | * The problem is that perfectly aligning the shares is rather expensive, hence | ||
1148 | * we try to avoid doing that too often - see update_shares(), which ratelimits | ||
1149 | * this change. | ||
1150 | * | ||
1151 | * We compensate this by not only taking the current delta into account, but | ||
1152 | * also considering the delta between when the shares were last adjusted and | ||
1153 | * now. | ||
1154 | * | ||
1155 | * We still saw a performance dip, some tracing learned us that between | ||
1156 | * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased | ||
1157 | * significantly. Therefore try to bias the error in direction of failing | ||
1158 | * the affine wakeup. | ||
1159 | * | ||
1160 | */ | 1356 | */ |
1161 | static long effective_load(struct task_group *tg, int cpu, | 1357 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) |
1162 | long wl, long wg) | ||
1163 | { | 1358 | { |
1164 | struct sched_entity *se = tg->se[cpu]; | 1359 | struct sched_entity *se = tg->se[cpu]; |
1165 | 1360 | ||
1166 | if (!tg->parent) | 1361 | if (!tg->parent) |
1167 | return wl; | 1362 | return wl; |
1168 | 1363 | ||
1169 | /* | ||
1170 | * By not taking the decrease of shares on the other cpu into | ||
1171 | * account our error leans towards reducing the affine wakeups. | ||
1172 | */ | ||
1173 | if (!wl && sched_feat(ASYM_EFF_LOAD)) | ||
1174 | return wl; | ||
1175 | |||
1176 | for_each_sched_entity(se) { | 1364 | for_each_sched_entity(se) { |
1177 | long S, rw, s, a, b; | 1365 | long S, rw, s, a, b; |
1178 | long more_w; | ||
1179 | |||
1180 | /* | ||
1181 | * Instead of using this increment, also add the difference | ||
1182 | * between when the shares were last updated and now. | ||
1183 | */ | ||
1184 | more_w = se->my_q->load.weight - se->my_q->rq_weight; | ||
1185 | wl += more_w; | ||
1186 | wg += more_w; | ||
1187 | 1366 | ||
1188 | S = se->my_q->tg->shares; | 1367 | S = se->my_q->tg->shares; |
1189 | s = se->my_q->shares; | 1368 | s = se->load.weight; |
1190 | rw = se->my_q->rq_weight; | 1369 | rw = se->my_q->load.weight; |
1191 | 1370 | ||
1192 | a = S*(rw + wl); | 1371 | a = S*(rw + wl); |
1193 | b = S*rw + s*wg; | 1372 | b = S*rw + s*wg; |
@@ -1508,23 +1687,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1508 | sd = tmp; | 1687 | sd = tmp; |
1509 | } | 1688 | } |
1510 | 1689 | ||
1511 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1512 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
1513 | /* | ||
1514 | * Pick the largest domain to update shares over | ||
1515 | */ | ||
1516 | tmp = sd; | ||
1517 | if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) | ||
1518 | tmp = affine_sd; | ||
1519 | |||
1520 | if (tmp) { | ||
1521 | raw_spin_unlock(&rq->lock); | ||
1522 | update_shares(tmp); | ||
1523 | raw_spin_lock(&rq->lock); | ||
1524 | } | ||
1525 | } | ||
1526 | #endif | ||
1527 | |||
1528 | if (affine_sd) { | 1690 | if (affine_sd) { |
1529 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 1691 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
1530 | return select_idle_sibling(p, cpu); | 1692 | return select_idle_sibling(p, cpu); |
@@ -1909,6 +2071,48 @@ out: | |||
1909 | } | 2071 | } |
1910 | 2072 | ||
1911 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2073 | #ifdef CONFIG_FAIR_GROUP_SCHED |
2074 | /* | ||
2075 | * update tg->load_weight by folding this cpu's load_avg | ||
2076 | */ | ||
2077 | static int update_shares_cpu(struct task_group *tg, int cpu) | ||
2078 | { | ||
2079 | struct cfs_rq *cfs_rq; | ||
2080 | unsigned long flags; | ||
2081 | struct rq *rq; | ||
2082 | |||
2083 | if (!tg->se[cpu]) | ||
2084 | return 0; | ||
2085 | |||
2086 | rq = cpu_rq(cpu); | ||
2087 | cfs_rq = tg->cfs_rq[cpu]; | ||
2088 | |||
2089 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2090 | |||
2091 | update_rq_clock(rq); | ||
2092 | update_cfs_load(cfs_rq, 1); | ||
2093 | |||
2094 | /* | ||
2095 | * We need to update shares after updating tg->load_weight in | ||
2096 | * order to adjust the weight of groups with long running tasks. | ||
2097 | */ | ||
2098 | update_cfs_shares(cfs_rq, 0); | ||
2099 | |||
2100 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2101 | |||
2102 | return 0; | ||
2103 | } | ||
2104 | |||
2105 | static void update_shares(int cpu) | ||
2106 | { | ||
2107 | struct cfs_rq *cfs_rq; | ||
2108 | struct rq *rq = cpu_rq(cpu); | ||
2109 | |||
2110 | rcu_read_lock(); | ||
2111 | for_each_leaf_cfs_rq(rq, cfs_rq) | ||
2112 | update_shares_cpu(cfs_rq->tg, cpu); | ||
2113 | rcu_read_unlock(); | ||
2114 | } | ||
2115 | |||
1912 | static unsigned long | 2116 | static unsigned long |
1913 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2117 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1914 | unsigned long max_load_move, | 2118 | unsigned long max_load_move, |
@@ -1956,6 +2160,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1956 | return max_load_move - rem_load_move; | 2160 | return max_load_move - rem_load_move; |
1957 | } | 2161 | } |
1958 | #else | 2162 | #else |
2163 | static inline void update_shares(int cpu) | ||
2164 | { | ||
2165 | } | ||
2166 | |||
1959 | static unsigned long | 2167 | static unsigned long |
1960 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2168 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1961 | unsigned long max_load_move, | 2169 | unsigned long max_load_move, |
@@ -3032,7 +3240,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3032 | schedstat_inc(sd, lb_count[idle]); | 3240 | schedstat_inc(sd, lb_count[idle]); |
3033 | 3241 | ||
3034 | redo: | 3242 | redo: |
3035 | update_shares(sd); | ||
3036 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3243 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
3037 | cpus, balance); | 3244 | cpus, balance); |
3038 | 3245 | ||
@@ -3174,8 +3381,6 @@ out_one_pinned: | |||
3174 | else | 3381 | else |
3175 | ld_moved = 0; | 3382 | ld_moved = 0; |
3176 | out: | 3383 | out: |
3177 | if (ld_moved) | ||
3178 | update_shares(sd); | ||
3179 | return ld_moved; | 3384 | return ld_moved; |
3180 | } | 3385 | } |
3181 | 3386 | ||
@@ -3199,6 +3404,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3199 | */ | 3404 | */ |
3200 | raw_spin_unlock(&this_rq->lock); | 3405 | raw_spin_unlock(&this_rq->lock); |
3201 | 3406 | ||
3407 | update_shares(this_cpu); | ||
3202 | for_each_domain(this_cpu, sd) { | 3408 | for_each_domain(this_cpu, sd) { |
3203 | unsigned long interval; | 3409 | unsigned long interval; |
3204 | int balance = 1; | 3410 | int balance = 1; |
@@ -3569,6 +3775,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3569 | int update_next_balance = 0; | 3775 | int update_next_balance = 0; |
3570 | int need_serialize; | 3776 | int need_serialize; |
3571 | 3777 | ||
3778 | update_shares(cpu); | ||
3779 | |||
3572 | for_each_domain(cpu, sd) { | 3780 | for_each_domain(cpu, sd) { |
3573 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3781 | if (!(sd->flags & SD_LOAD_BALANCE)) |
3574 | continue; | 3782 | continue; |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 185f920ec1a2..68e69acc29b9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0) | |||
52 | SCHED_FEAT(HRTICK, 0) | 52 | SCHED_FEAT(HRTICK, 0) |
53 | SCHED_FEAT(DOUBLE_TICK, 0) | 53 | SCHED_FEAT(DOUBLE_TICK, 0) |
54 | SCHED_FEAT(LB_BIAS, 1) | 54 | SCHED_FEAT(LB_BIAS, 1) |
55 | SCHED_FEAT(LB_SHARES_UPDATE, 1) | ||
56 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | ||
57 | 55 | ||
58 | /* | 56 | /* |
59 | * Spin-wait on mutex acquisition when the mutex owner is running on | 57 | * Spin-wait on mutex acquisition when the mutex owner is running on |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index bea7d79f7e9c..c914ec747ca6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
183 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | 183 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); |
184 | } | 184 | } |
185 | 185 | ||
186 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
187 | { | ||
188 | list_add_rcu(&rt_rq->leaf_rt_rq_list, | ||
189 | &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); | ||
190 | } | ||
191 | |||
192 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
193 | { | ||
194 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
195 | } | ||
196 | |||
186 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 197 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
187 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) | 198 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) |
188 | 199 | ||
@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
276 | return ktime_to_ns(def_rt_bandwidth.rt_period); | 287 | return ktime_to_ns(def_rt_bandwidth.rt_period); |
277 | } | 288 | } |
278 | 289 | ||
290 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
291 | { | ||
292 | } | ||
293 | |||
294 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
295 | { | ||
296 | } | ||
297 | |||
279 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 298 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
280 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | 299 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) |
281 | 300 | ||
@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) | |||
825 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 844 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
826 | return; | 845 | return; |
827 | 846 | ||
847 | if (!rt_rq->rt_nr_running) | ||
848 | list_add_leaf_rt_rq(rt_rq); | ||
849 | |||
828 | if (head) | 850 | if (head) |
829 | list_add(&rt_se->run_list, queue); | 851 | list_add(&rt_se->run_list, queue); |
830 | else | 852 | else |
@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
844 | __clear_bit(rt_se_prio(rt_se), array->bitmap); | 866 | __clear_bit(rt_se_prio(rt_se), array->bitmap); |
845 | 867 | ||
846 | dec_rt_tasks(rt_se, rt_rq); | 868 | dec_rt_tasks(rt_se, rt_rq); |
869 | if (!rt_rq->rt_nr_running) | ||
870 | list_del_leaf_rt_rq(rt_rq); | ||
847 | } | 871 | } |
848 | 872 | ||
849 | /* | 873 | /* |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 18f4be0d5fe0..d4d918a91881 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
853 | cpumask_any(cpu_online_mask)); | 853 | cpumask_any(cpu_online_mask)); |
854 | case CPU_DEAD: | 854 | case CPU_DEAD: |
855 | case CPU_DEAD_FROZEN: { | 855 | case CPU_DEAD_FROZEN: { |
856 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 856 | static struct sched_param param = { |
857 | .sched_priority = MAX_RT_PRIO-1 | ||
858 | }; | ||
857 | 859 | ||
858 | p = per_cpu(ksoftirqd, hotcpu); | 860 | p = per_cpu(ksoftirqd, hotcpu); |
859 | per_cpu(ksoftirqd, hotcpu) = NULL; | 861 | per_cpu(ksoftirqd, hotcpu) = NULL; |
diff --git a/kernel/srcu.c b/kernel/srcu.c index c71e07500536..98d8c1e80edb 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/rcupdate.h> | 31 | #include <linux/rcupdate.h> |
32 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
33 | #include <linux/smp.h> | 33 | #include <linux/smp.h> |
34 | #include <linux/delay.h> | ||
34 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
35 | 36 | ||
36 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 37 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
@@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
203 | * all srcu_read_lock() calls using the old counters have completed. | 204 | * all srcu_read_lock() calls using the old counters have completed. |
204 | * Their corresponding critical sections might well be still | 205 | * Their corresponding critical sections might well be still |
205 | * executing, but the srcu_read_lock() primitives themselves | 206 | * executing, but the srcu_read_lock() primitives themselves |
206 | * will have finished executing. | 207 | * will have finished executing. We initially give readers |
208 | * an arbitrarily chosen 10 microseconds to get out of their | ||
209 | * SRCU read-side critical sections, then loop waiting 1/HZ | ||
210 | * seconds per iteration. | ||
207 | */ | 211 | */ |
208 | 212 | ||
213 | if (srcu_readers_active_idx(sp, idx)) | ||
214 | udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); | ||
209 | while (srcu_readers_active_idx(sp, idx)) | 215 | while (srcu_readers_active_idx(sp, idx)) |
210 | schedule_timeout_interruptible(1); | 216 | schedule_timeout_interruptible(1); |
211 | 217 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 7f5a0cd296a9..2745dcdb6c6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid) | |||
1080 | err = session; | 1080 | err = session; |
1081 | out: | 1081 | out: |
1082 | write_unlock_irq(&tasklist_lock); | 1082 | write_unlock_irq(&tasklist_lock); |
1083 | if (err > 0) | 1083 | if (err > 0) { |
1084 | proc_sid_connector(group_leader); | 1084 | proc_sid_connector(group_leader); |
1085 | sched_autogroup_create_attach(group_leader); | ||
1086 | } | ||
1085 | return err; | 1087 | return err; |
1086 | } | 1088 | } |
1087 | 1089 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 46404414d8a7..ae5cbb1e3ced 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ | |||
259 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 259 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
260 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | 260 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
261 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | 261 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; |
262 | static int min_sched_shares_ratelimit = 100000; /* 100 usec */ | ||
263 | static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ | ||
264 | #endif | 262 | #endif |
265 | 263 | ||
266 | #ifdef CONFIG_COMPACTION | 264 | #ifdef CONFIG_COMPACTION |
@@ -305,15 +303,6 @@ static struct ctl_table kern_table[] = { | |||
305 | .extra2 = &max_wakeup_granularity_ns, | 303 | .extra2 = &max_wakeup_granularity_ns, |
306 | }, | 304 | }, |
307 | { | 305 | { |
308 | .procname = "sched_shares_ratelimit", | ||
309 | .data = &sysctl_sched_shares_ratelimit, | ||
310 | .maxlen = sizeof(unsigned int), | ||
311 | .mode = 0644, | ||
312 | .proc_handler = sched_proc_update_handler, | ||
313 | .extra1 = &min_sched_shares_ratelimit, | ||
314 | .extra2 = &max_sched_shares_ratelimit, | ||
315 | }, | ||
316 | { | ||
317 | .procname = "sched_tunable_scaling", | 306 | .procname = "sched_tunable_scaling", |
318 | .data = &sysctl_sched_tunable_scaling, | 307 | .data = &sysctl_sched_tunable_scaling, |
319 | .maxlen = sizeof(enum sched_tunable_scaling), | 308 | .maxlen = sizeof(enum sched_tunable_scaling), |
@@ -323,14 +312,6 @@ static struct ctl_table kern_table[] = { | |||
323 | .extra2 = &max_sched_tunable_scaling, | 312 | .extra2 = &max_sched_tunable_scaling, |
324 | }, | 313 | }, |
325 | { | 314 | { |
326 | .procname = "sched_shares_thresh", | ||
327 | .data = &sysctl_sched_shares_thresh, | ||
328 | .maxlen = sizeof(unsigned int), | ||
329 | .mode = 0644, | ||
330 | .proc_handler = proc_dointvec_minmax, | ||
331 | .extra1 = &zero, | ||
332 | }, | ||
333 | { | ||
334 | .procname = "sched_migration_cost", | 315 | .procname = "sched_migration_cost", |
335 | .data = &sysctl_sched_migration_cost, | 316 | .data = &sysctl_sched_migration_cost, |
336 | .maxlen = sizeof(unsigned int), | 317 | .maxlen = sizeof(unsigned int), |
@@ -352,6 +333,13 @@ static struct ctl_table kern_table[] = { | |||
352 | .proc_handler = proc_dointvec, | 333 | .proc_handler = proc_dointvec, |
353 | }, | 334 | }, |
354 | { | 335 | { |
336 | .procname = "sched_shares_window", | ||
337 | .data = &sysctl_sched_shares_window, | ||
338 | .maxlen = sizeof(unsigned int), | ||
339 | .mode = 0644, | ||
340 | .proc_handler = proc_dointvec, | ||
341 | }, | ||
342 | { | ||
355 | .procname = "timer_migration", | 343 | .procname = "timer_migration", |
356 | .data = &sysctl_timer_migration, | 344 | .data = &sysctl_timer_migration, |
357 | .maxlen = sizeof(unsigned int), | 345 | .maxlen = sizeof(unsigned int), |
@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = { | |||
382 | .mode = 0644, | 370 | .mode = 0644, |
383 | .proc_handler = proc_dointvec, | 371 | .proc_handler = proc_dointvec, |
384 | }, | 372 | }, |
373 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
374 | { | ||
375 | .procname = "sched_autogroup_enabled", | ||
376 | .data = &sysctl_sched_autogroup_enabled, | ||
377 | .maxlen = sizeof(unsigned int), | ||
378 | .mode = 0644, | ||
379 | .proc_handler = proc_dointvec, | ||
380 | .extra1 = &zero, | ||
381 | .extra2 = &one, | ||
382 | }, | ||
383 | #endif | ||
385 | #ifdef CONFIG_PROVE_LOCKING | 384 | #ifdef CONFIG_PROVE_LOCKING |
386 | { | 385 | { |
387 | .procname = "prove_locking", | 386 | .procname = "prove_locking", |
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index ac38fbb176cc..a9ae369925ce 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/math64.h> | 23 | #include <linux/math64.h> |
24 | #include <linux/kernel.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * fixed point arithmetic scale factor for skew | 27 | * fixed point arithmetic scale factor for skew |
@@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync, | |||
57 | int index; | 58 | int index; |
58 | int num_samples = sync->num_samples; | 59 | int num_samples = sync->num_samples; |
59 | 60 | ||
60 | if (num_samples > sizeof(buffer)/sizeof(buffer[0])) { | 61 | if (num_samples > ARRAY_SIZE(buffer)) { |
61 | samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); | 62 | samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); |
62 | if (!samples) { | 63 | if (!samples) { |
63 | samples = buffer; | 64 | samples = buffer; |
64 | num_samples = sizeof(buffer)/sizeof(buffer[0]); | 65 | num_samples = ARRAY_SIZE(buffer); |
65 | } | 66 | } |
66 | } else { | 67 | } else { |
67 | samples = buffer; | 68 | samples = buffer; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 49010d822f72..5bb86da82003 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -32,6 +32,8 @@ struct timekeeper { | |||
32 | cycle_t cycle_interval; | 32 | cycle_t cycle_interval; |
33 | /* Number of clock shifted nano seconds in one NTP interval. */ | 33 | /* Number of clock shifted nano seconds in one NTP interval. */ |
34 | u64 xtime_interval; | 34 | u64 xtime_interval; |
35 | /* shifted nano seconds left over when rounding cycle_interval */ | ||
36 | s64 xtime_remainder; | ||
35 | /* Raw nano seconds accumulated per NTP interval. */ | 37 | /* Raw nano seconds accumulated per NTP interval. */ |
36 | u32 raw_interval; | 38 | u32 raw_interval; |
37 | 39 | ||
@@ -62,7 +64,7 @@ struct timekeeper timekeeper; | |||
62 | static void timekeeper_setup_internals(struct clocksource *clock) | 64 | static void timekeeper_setup_internals(struct clocksource *clock) |
63 | { | 65 | { |
64 | cycle_t interval; | 66 | cycle_t interval; |
65 | u64 tmp; | 67 | u64 tmp, ntpinterval; |
66 | 68 | ||
67 | timekeeper.clock = clock; | 69 | timekeeper.clock = clock; |
68 | clock->cycle_last = clock->read(clock); | 70 | clock->cycle_last = clock->read(clock); |
@@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) | |||
70 | /* Do the ns -> cycle conversion first, using original mult */ | 72 | /* Do the ns -> cycle conversion first, using original mult */ |
71 | tmp = NTP_INTERVAL_LENGTH; | 73 | tmp = NTP_INTERVAL_LENGTH; |
72 | tmp <<= clock->shift; | 74 | tmp <<= clock->shift; |
75 | ntpinterval = tmp; | ||
73 | tmp += clock->mult/2; | 76 | tmp += clock->mult/2; |
74 | do_div(tmp, clock->mult); | 77 | do_div(tmp, clock->mult); |
75 | if (tmp == 0) | 78 | if (tmp == 0) |
@@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) | |||
80 | 83 | ||
81 | /* Go back from cycles -> shifted ns */ | 84 | /* Go back from cycles -> shifted ns */ |
82 | timekeeper.xtime_interval = (u64) interval * clock->mult; | 85 | timekeeper.xtime_interval = (u64) interval * clock->mult; |
86 | timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; | ||
83 | timekeeper.raw_interval = | 87 | timekeeper.raw_interval = |
84 | ((u64) interval * clock->mult) >> clock->shift; | 88 | ((u64) interval * clock->mult) >> clock->shift; |
85 | 89 | ||
@@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
719 | 723 | ||
720 | /* Accumulate error between NTP and clock interval */ | 724 | /* Accumulate error between NTP and clock interval */ |
721 | timekeeper.ntp_error += tick_length << shift; | 725 | timekeeper.ntp_error += tick_length << shift; |
722 | timekeeper.ntp_error -= timekeeper.xtime_interval << | 726 | timekeeper.ntp_error -= |
727 | (timekeeper.xtime_interval + timekeeper.xtime_remainder) << | ||
723 | (timekeeper.ntp_error_shift + shift); | 728 | (timekeeper.ntp_error_shift + shift); |
724 | 729 | ||
725 | return offset; | 730 | return offset; |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ab8f5e33fa92..32a19f9397fc 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, | |||
79 | { | 79 | { |
80 | struct hrtimer *timer, tmp; | 80 | struct hrtimer *timer, tmp; |
81 | unsigned long next = 0, i; | 81 | unsigned long next = 0, i; |
82 | struct rb_node *curr; | 82 | struct timerqueue_node *curr; |
83 | unsigned long flags; | 83 | unsigned long flags; |
84 | 84 | ||
85 | next_one: | 85 | next_one: |
86 | i = 0; | 86 | i = 0; |
87 | raw_spin_lock_irqsave(&base->cpu_base->lock, flags); | 87 | raw_spin_lock_irqsave(&base->cpu_base->lock, flags); |
88 | 88 | ||
89 | curr = base->first; | 89 | curr = timerqueue_getnext(&base->active); |
90 | /* | 90 | /* |
91 | * Crude but we have to do this O(N*N) thing, because | 91 | * Crude but we have to do this O(N*N) thing, because |
92 | * we have to unlock the base when printing: | 92 | * we have to unlock the base when printing: |
93 | */ | 93 | */ |
94 | while (curr && i < next) { | 94 | while (curr && i < next) { |
95 | curr = rb_next(curr); | 95 | curr = timerqueue_iterate_next(curr); |
96 | i++; | 96 | i++; |
97 | } | 97 | } |
98 | 98 | ||
99 | if (curr) { | 99 | if (curr) { |
100 | 100 | ||
101 | timer = rb_entry(curr, struct hrtimer, node); | 101 | timer = container_of(curr, struct hrtimer, node); |
102 | tmp = *timer; | 102 | tmp = *timer; |
103 | raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); | 103 | raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); |
104 | 104 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 353b9227c2ec..43ca9936f2d0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases; | |||
88 | EXPORT_SYMBOL(boot_tvec_bases); | 88 | EXPORT_SYMBOL(boot_tvec_bases); |
89 | static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | 89 | static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; |
90 | 90 | ||
91 | /* | ||
92 | * Note that all tvec_bases are 2 byte aligned and lower bit of | ||
93 | * base in timer_list is guaranteed to be zero. Use the LSB to | ||
94 | * indicate whether the timer is deferrable. | ||
95 | * | ||
96 | * A deferrable timer will work normally when the system is busy, but | ||
97 | * will not cause a CPU to come out of idle just to service it; instead, | ||
98 | * the timer will be serviced when the CPU eventually wakes up with a | ||
99 | * subsequent non-deferrable timer. | ||
100 | */ | ||
101 | #define TBASE_DEFERRABLE_FLAG (0x1) | ||
102 | |||
103 | /* Functions below help us manage 'deferrable' flag */ | 91 | /* Functions below help us manage 'deferrable' flag */ |
104 | static inline unsigned int tbase_get_deferrable(struct tvec_base *base) | 92 | static inline unsigned int tbase_get_deferrable(struct tvec_base *base) |
105 | { | 93 | { |
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) | |||
113 | 101 | ||
114 | static inline void timer_set_deferrable(struct timer_list *timer) | 102 | static inline void timer_set_deferrable(struct timer_list *timer) |
115 | { | 103 | { |
116 | timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | | 104 | timer->base = TBASE_MAKE_DEFERRED(timer->base); |
117 | TBASE_DEFERRABLE_FLAG)); | ||
118 | } | 105 | } |
119 | 106 | ||
120 | static inline void | 107 | static inline void |
@@ -343,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) | |||
343 | } | 330 | } |
344 | EXPORT_SYMBOL_GPL(set_timer_slack); | 331 | EXPORT_SYMBOL_GPL(set_timer_slack); |
345 | 332 | ||
346 | |||
347 | static inline void set_running_timer(struct tvec_base *base, | ||
348 | struct timer_list *timer) | ||
349 | { | ||
350 | #ifdef CONFIG_SMP | ||
351 | base->running_timer = timer; | ||
352 | #endif | ||
353 | } | ||
354 | |||
355 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | 333 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) |
356 | { | 334 | { |
357 | unsigned long expires = timer->expires; | 335 | unsigned long expires = timer->expires; |
@@ -936,15 +914,12 @@ int del_timer(struct timer_list *timer) | |||
936 | } | 914 | } |
937 | EXPORT_SYMBOL(del_timer); | 915 | EXPORT_SYMBOL(del_timer); |
938 | 916 | ||
939 | #ifdef CONFIG_SMP | ||
940 | /** | 917 | /** |
941 | * try_to_del_timer_sync - Try to deactivate a timer | 918 | * try_to_del_timer_sync - Try to deactivate a timer |
942 | * @timer: timer do del | 919 | * @timer: timer do del |
943 | * | 920 | * |
944 | * This function tries to deactivate a timer. Upon successful (ret >= 0) | 921 | * This function tries to deactivate a timer. Upon successful (ret >= 0) |
945 | * exit the timer is not queued and the handler is not running on any CPU. | 922 | * exit the timer is not queued and the handler is not running on any CPU. |
946 | * | ||
947 | * It must not be called from interrupt contexts. | ||
948 | */ | 923 | */ |
949 | int try_to_del_timer_sync(struct timer_list *timer) | 924 | int try_to_del_timer_sync(struct timer_list *timer) |
950 | { | 925 | { |
@@ -973,6 +948,7 @@ out: | |||
973 | } | 948 | } |
974 | EXPORT_SYMBOL(try_to_del_timer_sync); | 949 | EXPORT_SYMBOL(try_to_del_timer_sync); |
975 | 950 | ||
951 | #ifdef CONFIG_SMP | ||
976 | /** | 952 | /** |
977 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 953 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
978 | * @timer: the timer to be deactivated | 954 | * @timer: the timer to be deactivated |
@@ -983,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
983 | * | 959 | * |
984 | * Synchronization rules: Callers must prevent restarting of the timer, | 960 | * Synchronization rules: Callers must prevent restarting of the timer, |
985 | * otherwise this function is meaningless. It must not be called from | 961 | * otherwise this function is meaningless. It must not be called from |
986 | * interrupt contexts. The caller must not hold locks which would prevent | 962 | * hardirq contexts. The caller must not hold locks which would prevent |
987 | * completion of the timer's handler. The timer's handler must not call | 963 | * completion of the timer's handler. The timer's handler must not call |
988 | * add_timer_on(). Upon exit the timer is not queued and the handler is | 964 | * add_timer_on(). Upon exit the timer is not queued and the handler is |
989 | * not running on any CPU. | 965 | * not running on any CPU. |
@@ -993,14 +969,16 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
993 | int del_timer_sync(struct timer_list *timer) | 969 | int del_timer_sync(struct timer_list *timer) |
994 | { | 970 | { |
995 | #ifdef CONFIG_LOCKDEP | 971 | #ifdef CONFIG_LOCKDEP |
996 | unsigned long flags; | 972 | local_bh_disable(); |
997 | |||
998 | local_irq_save(flags); | ||
999 | lock_map_acquire(&timer->lockdep_map); | 973 | lock_map_acquire(&timer->lockdep_map); |
1000 | lock_map_release(&timer->lockdep_map); | 974 | lock_map_release(&timer->lockdep_map); |
1001 | local_irq_restore(flags); | 975 | local_bh_enable(); |
1002 | #endif | 976 | #endif |
1003 | 977 | /* | |
978 | * don't use it in hardirq context, because it | ||
979 | * could lead to deadlock. | ||
980 | */ | ||
981 | WARN_ON(in_irq()); | ||
1004 | for (;;) { | 982 | for (;;) { |
1005 | int ret = try_to_del_timer_sync(timer); | 983 | int ret = try_to_del_timer_sync(timer); |
1006 | if (ret >= 0) | 984 | if (ret >= 0) |
@@ -1111,7 +1089,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
1111 | 1089 | ||
1112 | timer_stats_account_timer(timer); | 1090 | timer_stats_account_timer(timer); |
1113 | 1091 | ||
1114 | set_running_timer(base, timer); | 1092 | base->running_timer = timer; |
1115 | detach_timer(timer, 1); | 1093 | detach_timer(timer, 1); |
1116 | 1094 | ||
1117 | spin_unlock_irq(&base->lock); | 1095 | spin_unlock_irq(&base->lock); |
@@ -1119,7 +1097,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
1119 | spin_lock_irq(&base->lock); | 1097 | spin_lock_irq(&base->lock); |
1120 | } | 1098 | } |
1121 | } | 1099 | } |
1122 | set_running_timer(base, NULL); | 1100 | base->running_timer = NULL; |
1123 | spin_unlock_irq(&base->lock); | 1101 | spin_unlock_irq(&base->lock); |
1124 | } | 1102 | } |
1125 | 1103 | ||
@@ -1249,7 +1227,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, | |||
1249 | */ | 1227 | */ |
1250 | unsigned long get_next_timer_interrupt(unsigned long now) | 1228 | unsigned long get_next_timer_interrupt(unsigned long now) |
1251 | { | 1229 | { |
1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1230 | struct tvec_base *base = __this_cpu_read(tvec_bases); |
1253 | unsigned long expires; | 1231 | unsigned long expires; |
1254 | 1232 | ||
1255 | /* | 1233 | /* |
@@ -1298,7 +1276,7 @@ void update_process_times(int user_tick) | |||
1298 | */ | 1276 | */ |
1299 | static void run_timer_softirq(struct softirq_action *h) | 1277 | static void run_timer_softirq(struct softirq_action *h) |
1300 | { | 1278 | { |
1301 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1279 | struct tvec_base *base = __this_cpu_read(tvec_bases); |
1302 | 1280 | ||
1303 | hrtimer_run_pending(); | 1281 | hrtimer_run_pending(); |
1304 | 1282 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 53f338190b26..761c510a06c5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o | |||
52 | endif | 52 | endif |
53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
55 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 55 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o |
56 | ifeq ($(CONFIG_TRACING),y) | 56 | ifeq ($(CONFIG_TRACING),y) |
57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | 57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o |
58 | endif | 58 | endif |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8cf959bad45..dc53ecb80589 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1313 | 1313 | ||
1314 | __this_cpu_inc(user_stack_count); | 1314 | __this_cpu_inc(user_stack_count); |
1315 | 1315 | ||
1316 | |||
1317 | |||
1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1316 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1319 | sizeof(*entry), flags, pc); | 1317 | sizeof(*entry), flags, pc); |
1320 | if (!event) | 1318 | if (!event) |
1321 | return; | 1319 | goto out_drop_count; |
1322 | entry = ring_buffer_event_data(event); | 1320 | entry = ring_buffer_event_data(event); |
1323 | 1321 | ||
1324 | entry->tgid = current->tgid; | 1322 | entry->tgid = current->tgid; |
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1333 | if (!filter_check_discard(call, entry, buffer, event)) | 1331 | if (!filter_check_discard(call, entry, buffer, event)) |
1334 | ring_buffer_unlock_commit(buffer, event); | 1332 | ring_buffer_unlock_commit(buffer, event); |
1335 | 1333 | ||
1334 | out_drop_count: | ||
1336 | __this_cpu_dec(user_stack_count); | 1335 | __this_cpu_dec(user_stack_count); |
1337 | |||
1338 | out: | 1336 | out: |
1339 | preempt_enable(); | 1337 | preempt_enable(); |
1340 | } | 1338 | } |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b3209..562c56e048fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) | |||
558 | static int trace_wakeup_test_thread(void *data) | 558 | static int trace_wakeup_test_thread(void *data) |
559 | { | 559 | { |
560 | /* Make this a RT thread, doesn't need to be too high */ | 560 | /* Make this a RT thread, doesn't need to be too high */ |
561 | struct sched_param param = { .sched_priority = 5 }; | 561 | static struct sched_param param = { .sched_priority = 5 }; |
562 | struct completion *x = data; | 562 | struct completion *x = data; |
563 | 563 | ||
564 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 564 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index aaa8dae08236..6e7b575ac33c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -309,7 +309,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
309 | */ | 309 | */ |
310 | static int watchdog(void *unused) | 310 | static int watchdog(void *unused) |
311 | { | 311 | { |
312 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 312 | static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
313 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 313 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
314 | 314 | ||
315 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 315 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
diff --git a/lib/Makefile b/lib/Makefile index e6a3763b8212..9e2db72d128e 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) | |||
8 | endif | 8 | endif |
9 | 9 | ||
10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ | 10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
11 | rbtree.o radix-tree.o dump_stack.o \ | 11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ |
12 | idr.o int_sqrt.o extable.o prio_tree.o \ | 12 | idr.o int_sqrt.o extable.o prio_tree.o \ |
13 | sha1.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o prio_heap.o ratelimit.o show_mem.o \ |
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 3094318bfea7..b335acb43be2 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c | |||
@@ -141,11 +141,10 @@ static void ddebug_change(const struct ddebug_query *query, | |||
141 | else if (!dp->flags) | 141 | else if (!dp->flags) |
142 | dt->num_enabled++; | 142 | dt->num_enabled++; |
143 | dp->flags = newflags; | 143 | dp->flags = newflags; |
144 | if (newflags) { | 144 | if (newflags) |
145 | jump_label_enable(&dp->enabled); | 145 | dp->enabled = 1; |
146 | } else { | 146 | else |
147 | jump_label_disable(&dp->enabled); | 147 | dp->enabled = 0; |
148 | } | ||
149 | if (verbose) | 148 | if (verbose) |
150 | printk(KERN_INFO | 149 | printk(KERN_INFO |
151 | "ddebug: changed %s:%d [%s]%s %s\n", | 150 | "ddebug: changed %s:%d [%s]%s %s\n", |
diff --git a/lib/timerqueue.c b/lib/timerqueue.c new file mode 100644 index 000000000000..e3a1050e6820 --- /dev/null +++ b/lib/timerqueue.c | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * Generic Timer-queue | ||
3 | * | ||
4 | * Manages a simple queue of timers, ordered by expiration time. | ||
5 | * Uses rbtrees for quick list adds and expiration. | ||
6 | * | ||
7 | * NOTE: All of the following functions need to be serialized | ||
8 | * to avoid races. No locking is done by this libary code. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | #include <linux/timerqueue.h> | ||
26 | #include <linux/rbtree.h> | ||
27 | #include <linux/module.h> | ||
28 | |||
29 | /** | ||
30 | * timerqueue_add - Adds timer to timerqueue. | ||
31 | * | ||
32 | * @head: head of timerqueue | ||
33 | * @node: timer node to be added | ||
34 | * | ||
35 | * Adds the timer node to the timerqueue, sorted by the | ||
36 | * node's expires value. | ||
37 | */ | ||
38 | void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | ||
39 | { | ||
40 | struct rb_node **p = &head->head.rb_node; | ||
41 | struct rb_node *parent = NULL; | ||
42 | struct timerqueue_node *ptr; | ||
43 | |||
44 | /* Make sure we don't add nodes that are already added */ | ||
45 | WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); | ||
46 | |||
47 | while (*p) { | ||
48 | parent = *p; | ||
49 | ptr = rb_entry(parent, struct timerqueue_node, node); | ||
50 | if (node->expires.tv64 < ptr->expires.tv64) | ||
51 | p = &(*p)->rb_left; | ||
52 | else | ||
53 | p = &(*p)->rb_right; | ||
54 | } | ||
55 | rb_link_node(&node->node, parent, p); | ||
56 | rb_insert_color(&node->node, &head->head); | ||
57 | |||
58 | if (!head->next || node->expires.tv64 < head->next->expires.tv64) | ||
59 | head->next = node; | ||
60 | } | ||
61 | EXPORT_SYMBOL_GPL(timerqueue_add); | ||
62 | |||
63 | /** | ||
64 | * timerqueue_del - Removes a timer from the timerqueue. | ||
65 | * | ||
66 | * @head: head of timerqueue | ||
67 | * @node: timer node to be removed | ||
68 | * | ||
69 | * Removes the timer node from the timerqueue. | ||
70 | */ | ||
71 | void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) | ||
72 | { | ||
73 | WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); | ||
74 | |||
75 | /* update next pointer */ | ||
76 | if (head->next == node) { | ||
77 | struct rb_node *rbn = rb_next(&node->node); | ||
78 | |||
79 | head->next = rbn ? | ||
80 | rb_entry(rbn, struct timerqueue_node, node) : NULL; | ||
81 | } | ||
82 | rb_erase(&node->node, &head->head); | ||
83 | RB_CLEAR_NODE(&node->node); | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(timerqueue_del); | ||
86 | |||
87 | /** | ||
88 | * timerqueue_iterate_next - Returns the timer after the provided timer | ||
89 | * | ||
90 | * @node: Pointer to a timer. | ||
91 | * | ||
92 | * Provides the timer that is after the given node. This is used, when | ||
93 | * necessary, to iterate through the list of timers in a timer list | ||
94 | * without modifying the list. | ||
95 | */ | ||
96 | struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node) | ||
97 | { | ||
98 | struct rb_node *next; | ||
99 | |||
100 | if (!node) | ||
101 | return NULL; | ||
102 | next = rb_next(&node->node); | ||
103 | if (!next) | ||
104 | return NULL; | ||
105 | return container_of(next, struct timerqueue_node, node); | ||
106 | } | ||
107 | EXPORT_SYMBOL_GPL(timerqueue_iterate_next); | ||
diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 39580a5dc5df..9f85012acf0d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc | |||
@@ -155,6 +155,8 @@ use strict; | |||
155 | # '@parameter' - name of a parameter | 155 | # '@parameter' - name of a parameter |
156 | # '%CONST' - name of a constant. | 156 | # '%CONST' - name of a constant. |
157 | 157 | ||
158 | ## init lots of data | ||
159 | |||
158 | my $errors = 0; | 160 | my $errors = 0; |
159 | my $warnings = 0; | 161 | my $warnings = 0; |
160 | my $anon_struct_union = 0; | 162 | my $anon_struct_union = 0; |
@@ -218,21 +220,14 @@ my %highlights_list = ( $type_constant, "\$1", | |||
218 | $type_param, "\$1" ); | 220 | $type_param, "\$1" ); |
219 | my $blankline_list = ""; | 221 | my $blankline_list = ""; |
220 | 222 | ||
221 | sub usage { | ||
222 | print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n"; | ||
223 | print " [ -no-doc-sections ]\n"; | ||
224 | print " [ -function funcname [ -function funcname ...] ]\n"; | ||
225 | print " [ -nofunction funcname [ -nofunction funcname ...] ]\n"; | ||
226 | print " c source file(s) > outputfile\n"; | ||
227 | print " -v : verbose output, more warnings & other info listed\n"; | ||
228 | exit 1; | ||
229 | } | ||
230 | |||
231 | # read arguments | 223 | # read arguments |
232 | if ($#ARGV == -1) { | 224 | if ($#ARGV == -1) { |
233 | usage(); | 225 | usage(); |
234 | } | 226 | } |
235 | 227 | ||
228 | my $kernelversion; | ||
229 | my $dohighlight = ""; | ||
230 | |||
236 | my $verbose = 0; | 231 | my $verbose = 0; |
237 | my $output_mode = "man"; | 232 | my $output_mode = "man"; |
238 | my $no_doc_sections = 0; | 233 | my $no_doc_sections = 0; |
@@ -245,7 +240,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', | |||
245 | 'November', 'December')[(localtime)[4]] . | 240 | 'November', 'December')[(localtime)[4]] . |
246 | " " . ((localtime)[5]+1900); | 241 | " " . ((localtime)[5]+1900); |
247 | 242 | ||
248 | # Essentially these are globals | 243 | # Essentially these are globals. |
249 | # They probably want to be tidied up, made more localised or something. | 244 | # They probably want to be tidied up, made more localised or something. |
250 | # CAVEAT EMPTOR! Some of the others I localised may not want to be, which | 245 | # CAVEAT EMPTOR! Some of the others I localised may not want to be, which |
251 | # could cause "use of undefined value" or other bugs. | 246 | # could cause "use of undefined value" or other bugs. |
@@ -353,6 +348,18 @@ while ($ARGV[0] =~ m/^-(.*)/) { | |||
353 | } | 348 | } |
354 | } | 349 | } |
355 | 350 | ||
351 | # continue execution near EOF; | ||
352 | |||
353 | sub usage { | ||
354 | print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n"; | ||
355 | print " [ -no-doc-sections ]\n"; | ||
356 | print " [ -function funcname [ -function funcname ...] ]\n"; | ||
357 | print " [ -nofunction funcname [ -nofunction funcname ...] ]\n"; | ||
358 | print " c source file(s) > outputfile\n"; | ||
359 | print " -v : verbose output, more warnings & other info listed\n"; | ||
360 | exit 1; | ||
361 | } | ||
362 | |||
356 | # get kernel version from env | 363 | # get kernel version from env |
357 | sub get_kernel_version() { | 364 | sub get_kernel_version() { |
358 | my $version = 'unknown kernel version'; | 365 | my $version = 'unknown kernel version'; |
@@ -362,15 +369,6 @@ sub get_kernel_version() { | |||
362 | } | 369 | } |
363 | return $version; | 370 | return $version; |
364 | } | 371 | } |
365 | my $kernelversion = get_kernel_version(); | ||
366 | |||
367 | # generate a sequence of code that will splice in highlighting information | ||
368 | # using the s// operator. | ||
369 | my $dohighlight = ""; | ||
370 | foreach my $pattern (keys %highlights) { | ||
371 | # print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n"; | ||
372 | $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n"; | ||
373 | } | ||
374 | 372 | ||
375 | ## | 373 | ## |
376 | # dumps section contents to arrays/hashes intended for that purpose. | 374 | # dumps section contents to arrays/hashes intended for that purpose. |
@@ -1851,34 +1849,6 @@ sub dump_function($$) { | |||
1851 | }); | 1849 | }); |
1852 | } | 1850 | } |
1853 | 1851 | ||
1854 | sub process_file($); | ||
1855 | |||
1856 | # Read the file that maps relative names to absolute names for | ||
1857 | # separate source and object directories and for shadow trees. | ||
1858 | if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { | ||
1859 | my ($relname, $absname); | ||
1860 | while(<SOURCE_MAP>) { | ||
1861 | chop(); | ||
1862 | ($relname, $absname) = (split())[0..1]; | ||
1863 | $relname =~ s:^/+::; | ||
1864 | $source_map{$relname} = $absname; | ||
1865 | } | ||
1866 | close(SOURCE_MAP); | ||
1867 | } | ||
1868 | |||
1869 | foreach (@ARGV) { | ||
1870 | chomp; | ||
1871 | process_file($_); | ||
1872 | } | ||
1873 | if ($verbose && $errors) { | ||
1874 | print STDERR "$errors errors\n"; | ||
1875 | } | ||
1876 | if ($verbose && $warnings) { | ||
1877 | print STDERR "$warnings warnings\n"; | ||
1878 | } | ||
1879 | |||
1880 | exit($errors); | ||
1881 | |||
1882 | sub reset_state { | 1852 | sub reset_state { |
1883 | $function = ""; | 1853 | $function = ""; |
1884 | %constants = (); | 1854 | %constants = (); |
@@ -2285,3 +2255,39 @@ sub process_file($) { | |||
2285 | } | 2255 | } |
2286 | } | 2256 | } |
2287 | } | 2257 | } |
2258 | |||
2259 | |||
2260 | $kernelversion = get_kernel_version(); | ||
2261 | |||
2262 | # generate a sequence of code that will splice in highlighting information | ||
2263 | # using the s// operator. | ||
2264 | foreach my $pattern (keys %highlights) { | ||
2265 | # print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n"; | ||
2266 | $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n"; | ||
2267 | } | ||
2268 | |||
2269 | # Read the file that maps relative names to absolute names for | ||
2270 | # separate source and object directories and for shadow trees. | ||
2271 | if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { | ||
2272 | my ($relname, $absname); | ||
2273 | while(<SOURCE_MAP>) { | ||
2274 | chop(); | ||
2275 | ($relname, $absname) = (split())[0..1]; | ||
2276 | $relname =~ s:^/+::; | ||
2277 | $source_map{$relname} = $absname; | ||
2278 | } | ||
2279 | close(SOURCE_MAP); | ||
2280 | } | ||
2281 | |||
2282 | foreach (@ARGV) { | ||
2283 | chomp; | ||
2284 | process_file($_); | ||
2285 | } | ||
2286 | if ($verbose && $errors) { | ||
2287 | print STDERR "$errors errors\n"; | ||
2288 | } | ||
2289 | if ($verbose && $warnings) { | ||
2290 | print STDERR "$warnings warnings\n"; | ||
2291 | } | ||
2292 | |||
2293 | exit($errors); | ||