diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 18:52:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 18:52:04 -0400 |
commit | 23b7776290b10297fe2cae0fb5f166a4f2c68121 (patch) | |
tree | 73d1e76644a20bc7bff80fbfdb08e8b9a9f28420 | |
parent | 6bc4c3ad3619e1bcb4a6330e030007ace8ca465e (diff) | |
parent | 6fab54101923044712baee429ff573f03b99fc47 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes are:
- lockless wakeup support for futexes and IPC message queues
(Davidlohr Bueso, Peter Zijlstra)
- Replace spinlocks with atomics in thread_group_cputimer(), to
improve scalability (Jason Low)
- NUMA balancing improvements (Rik van Riel)
- SCHED_DEADLINE improvements (Wanpeng Li)
- clean up and reorganize preemption helpers (Frederic Weisbecker)
- decouple page fault disabling machinery from the preemption
counter, to improve debuggability and robustness (David
Hildenbrand)
- SCHED_DEADLINE documentation updates (Luca Abeni)
- topology CPU masks cleanups (Bartosz Golaszewski)
- /proc/sched_debug improvements (Srikar Dronamraju)"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (79 commits)
sched/deadline: Remove needless parameter in dl_runtime_exceeded()
sched: Remove superfluous resetting of the p->dl_throttled flag
sched/deadline: Drop duplicate init_sched_dl_class() declaration
sched/deadline: Reduce rq lock contention by eliminating locking of non-feasible target
sched/deadline: Make init_sched_dl_class() __init
sched/deadline: Optimize pull_dl_task()
sched/preempt: Add static_key() to preempt_notifiers
sched/preempt: Fix preempt notifiers documentation about hlist_del() within unsafe iteration
sched/stop_machine: Fix deadlock between multiple stop_two_cpus()
sched/debug: Add sum_sleep_runtime to /proc/<pid>/sched
sched/debug: Replace vruntime with wait_sum in /proc/sched_debug
sched/debug: Properly format runnable tasks in /proc/sched_debug
sched/numa: Only consider less busy nodes as numa balancing destinations
Revert 095bebf61a46 ("sched/numa: Do not move past the balance point if unbalanced")
sched/fair: Prevent throttling in early pick_next_task_fair()
preempt: Reorganize the notrace definitions a bit
preempt: Use preempt_schedule_context() as the official tracing preemption point
sched: Make preempt_schedule_context() function-tracing safe
x86: Remove cpu_sibling_mask() and cpu_core_mask()
x86: Replace cpu_**_mask() with topology_**_cpumask()
...
138 files changed, 1442 insertions, 972 deletions
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 0aad6deb2d96..12b1b25b4da9 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | 1 | ||
2 | Export CPU topology info via sysfs. Items (attributes) are similar | 2 | Export CPU topology info via sysfs. Items (attributes) are similar |
3 | to /proc/cpuinfo. | 3 | to /proc/cpuinfo output of some architectures: |
4 | 4 | ||
5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: | 5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: |
6 | 6 | ||
@@ -23,20 +23,35 @@ to /proc/cpuinfo. | |||
23 | 4) /sys/devices/system/cpu/cpuX/topology/thread_siblings: | 23 | 4) /sys/devices/system/cpu/cpuX/topology/thread_siblings: |
24 | 24 | ||
25 | internal kernel map of cpuX's hardware threads within the same | 25 | internal kernel map of cpuX's hardware threads within the same |
26 | core as cpuX | 26 | core as cpuX. |
27 | 27 | ||
28 | 5) /sys/devices/system/cpu/cpuX/topology/core_siblings: | 28 | 5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list: |
29 | |||
30 | human-readable list of cpuX's hardware threads within the same | ||
31 | core as cpuX. | ||
32 | |||
33 | 6) /sys/devices/system/cpu/cpuX/topology/core_siblings: | ||
29 | 34 | ||
30 | internal kernel map of cpuX's hardware threads within the same | 35 | internal kernel map of cpuX's hardware threads within the same |
31 | physical_package_id. | 36 | physical_package_id. |
32 | 37 | ||
33 | 6) /sys/devices/system/cpu/cpuX/topology/book_siblings: | 38 | 7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list: |
39 | |||
40 | human-readable list of cpuX's hardware threads within the same | ||
41 | physical_package_id. | ||
42 | |||
43 | 8) /sys/devices/system/cpu/cpuX/topology/book_siblings: | ||
34 | 44 | ||
35 | internal kernel map of cpuX's hardware threads within the same | 45 | internal kernel map of cpuX's hardware threads within the same |
36 | book_id. | 46 | book_id. |
37 | 47 | ||
48 | 9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list: | ||
49 | |||
50 | human-readable list of cpuX's hardware threads within the same | ||
51 | book_id. | ||
52 | |||
38 | To implement it in an architecture-neutral way, a new source file, | 53 | To implement it in an architecture-neutral way, a new source file, |
39 | drivers/base/topology.c, is to export the 4 or 6 attributes. The two book | 54 | drivers/base/topology.c, is to export the 6 or 9 attributes. The three book |
40 | related sysfs files will only be created if CONFIG_SCHED_BOOK is selected. | 55 | related sysfs files will only be created if CONFIG_SCHED_BOOK is selected. |
41 | 56 | ||
42 | For an architecture to support this feature, it must define some of | 57 | For an architecture to support this feature, it must define some of |
@@ -44,20 +59,22 @@ these macros in include/asm-XXX/topology.h: | |||
44 | #define topology_physical_package_id(cpu) | 59 | #define topology_physical_package_id(cpu) |
45 | #define topology_core_id(cpu) | 60 | #define topology_core_id(cpu) |
46 | #define topology_book_id(cpu) | 61 | #define topology_book_id(cpu) |
47 | #define topology_thread_cpumask(cpu) | 62 | #define topology_sibling_cpumask(cpu) |
48 | #define topology_core_cpumask(cpu) | 63 | #define topology_core_cpumask(cpu) |
49 | #define topology_book_cpumask(cpu) | 64 | #define topology_book_cpumask(cpu) |
50 | 65 | ||
51 | The type of **_id is int. | 66 | The type of **_id macros is int. |
52 | The type of siblings is (const) struct cpumask *. | 67 | The type of **_cpumask macros is (const) struct cpumask *. The latter |
68 | correspond with appropriate **_siblings sysfs attributes (except for | ||
69 | topology_sibling_cpumask() which corresponds with thread_siblings). | ||
53 | 70 | ||
54 | To be consistent on all architectures, include/linux/topology.h | 71 | To be consistent on all architectures, include/linux/topology.h |
55 | provides default definitions for any of the above macros that are | 72 | provides default definitions for any of the above macros that are |
56 | not defined by include/asm-XXX/topology.h: | 73 | not defined by include/asm-XXX/topology.h: |
57 | 1) physical_package_id: -1 | 74 | 1) physical_package_id: -1 |
58 | 2) core_id: 0 | 75 | 2) core_id: 0 |
59 | 3) thread_siblings: just the given CPU | 76 | 3) sibling_cpumask: just the given CPU |
60 | 4) core_siblings: just the given CPU | 77 | 4) core_cpumask: just the given CPU |
61 | 78 | ||
62 | For architectures that don't support books (CONFIG_SCHED_BOOK) there are no | 79 | For architectures that don't support books (CONFIG_SCHED_BOOK) there are no |
63 | default definitions for topology_book_id() and topology_book_cpumask(). | 80 | default definitions for topology_book_id() and topology_book_cpumask(). |
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt index 21461a0441c1..e114513a2731 100644 --- a/Documentation/scheduler/sched-deadline.txt +++ b/Documentation/scheduler/sched-deadline.txt | |||
@@ -8,6 +8,10 @@ CONTENTS | |||
8 | 1. Overview | 8 | 1. Overview |
9 | 2. Scheduling algorithm | 9 | 2. Scheduling algorithm |
10 | 3. Scheduling Real-Time Tasks | 10 | 3. Scheduling Real-Time Tasks |
11 | 3.1 Definitions | ||
12 | 3.2 Schedulability Analysis for Uniprocessor Systems | ||
13 | 3.3 Schedulability Analysis for Multiprocessor Systems | ||
14 | 3.4 Relationship with SCHED_DEADLINE Parameters | ||
11 | 4. Bandwidth management | 15 | 4. Bandwidth management |
12 | 4.1 System-wide settings | 16 | 4.1 System-wide settings |
13 | 4.2 Task interface | 17 | 4.2 Task interface |
@@ -43,7 +47,7 @@ CONTENTS | |||
43 | "deadline", to schedule tasks. A SCHED_DEADLINE task should receive | 47 | "deadline", to schedule tasks. A SCHED_DEADLINE task should receive |
44 | "runtime" microseconds of execution time every "period" microseconds, and | 48 | "runtime" microseconds of execution time every "period" microseconds, and |
45 | these "runtime" microseconds are available within "deadline" microseconds | 49 | these "runtime" microseconds are available within "deadline" microseconds |
46 | from the beginning of the period. In order to implement this behaviour, | 50 | from the beginning of the period. In order to implement this behavior, |
47 | every time the task wakes up, the scheduler computes a "scheduling deadline" | 51 | every time the task wakes up, the scheduler computes a "scheduling deadline" |
48 | consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then | 52 | consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then |
49 | scheduled using EDF[1] on these scheduling deadlines (the task with the | 53 | scheduled using EDF[1] on these scheduling deadlines (the task with the |
@@ -52,7 +56,7 @@ CONTENTS | |||
52 | "admission control" strategy (see Section "4. Bandwidth management") is used | 56 | "admission control" strategy (see Section "4. Bandwidth management") is used |
53 | (clearly, if the system is overloaded this guarantee cannot be respected). | 57 | (clearly, if the system is overloaded this guarantee cannot be respected). |
54 | 58 | ||
55 | Summing up, the CBS[2,3] algorithms assigns scheduling deadlines to tasks so | 59 | Summing up, the CBS[2,3] algorithm assigns scheduling deadlines to tasks so |
56 | that each task runs for at most its runtime every period, avoiding any | 60 | that each task runs for at most its runtime every period, avoiding any |
57 | interference between different tasks (bandwidth isolation), while the EDF[1] | 61 | interference between different tasks (bandwidth isolation), while the EDF[1] |
58 | algorithm selects the task with the earliest scheduling deadline as the one | 62 | algorithm selects the task with the earliest scheduling deadline as the one |
@@ -63,7 +67,7 @@ CONTENTS | |||
63 | In more details, the CBS algorithm assigns scheduling deadlines to | 67 | In more details, the CBS algorithm assigns scheduling deadlines to |
64 | tasks in the following way: | 68 | tasks in the following way: |
65 | 69 | ||
66 | - Each SCHED_DEADLINE task is characterised by the "runtime", | 70 | - Each SCHED_DEADLINE task is characterized by the "runtime", |
67 | "deadline", and "period" parameters; | 71 | "deadline", and "period" parameters; |
68 | 72 | ||
69 | - The state of the task is described by a "scheduling deadline", and | 73 | - The state of the task is described by a "scheduling deadline", and |
@@ -78,7 +82,7 @@ CONTENTS | |||
78 | 82 | ||
79 | then, if the scheduling deadline is smaller than the current time, or | 83 | then, if the scheduling deadline is smaller than the current time, or |
80 | this condition is verified, the scheduling deadline and the | 84 | this condition is verified, the scheduling deadline and the |
81 | remaining runtime are re-initialised as | 85 | remaining runtime are re-initialized as |
82 | 86 | ||
83 | scheduling deadline = current time + deadline | 87 | scheduling deadline = current time + deadline |
84 | remaining runtime = runtime | 88 | remaining runtime = runtime |
@@ -126,31 +130,37 @@ CONTENTS | |||
126 | suited for periodic or sporadic real-time tasks that need guarantees on their | 130 | suited for periodic or sporadic real-time tasks that need guarantees on their |
127 | timing behavior, e.g., multimedia, streaming, control applications, etc. | 131 | timing behavior, e.g., multimedia, streaming, control applications, etc. |
128 | 132 | ||
133 | 3.1 Definitions | ||
134 | ------------------------ | ||
135 | |||
129 | A typical real-time task is composed of a repetition of computation phases | 136 | A typical real-time task is composed of a repetition of computation phases |
130 | (task instances, or jobs) which are activated on a periodic or sporadic | 137 | (task instances, or jobs) which are activated on a periodic or sporadic |
131 | fashion. | 138 | fashion. |
132 | Each job J_j (where J_j is the j^th job of the task) is characterised by an | 139 | Each job J_j (where J_j is the j^th job of the task) is characterized by an |
133 | arrival time r_j (the time when the job starts), an amount of computation | 140 | arrival time r_j (the time when the job starts), an amount of computation |
134 | time c_j needed to finish the job, and a job absolute deadline d_j, which | 141 | time c_j needed to finish the job, and a job absolute deadline d_j, which |
135 | is the time within which the job should be finished. The maximum execution | 142 | is the time within which the job should be finished. The maximum execution |
136 | time max_j{c_j} is called "Worst Case Execution Time" (WCET) for the task. | 143 | time max{c_j} is called "Worst Case Execution Time" (WCET) for the task. |
137 | A real-time task can be periodic with period P if r_{j+1} = r_j + P, or | 144 | A real-time task can be periodic with period P if r_{j+1} = r_j + P, or |
138 | sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally, | 145 | sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally, |
139 | d_j = r_j + D, where D is the task's relative deadline. | 146 | d_j = r_j + D, where D is the task's relative deadline. |
140 | The utilisation of a real-time task is defined as the ratio between its | 147 | Summing up, a real-time task can be described as |
148 | Task = (WCET, D, P) | ||
149 | |||
150 | The utilization of a real-time task is defined as the ratio between its | ||
141 | WCET and its period (or minimum inter-arrival time), and represents | 151 | WCET and its period (or minimum inter-arrival time), and represents |
142 | the fraction of CPU time needed to execute the task. | 152 | the fraction of CPU time needed to execute the task. |
143 | 153 | ||
144 | If the total utilisation sum_i(WCET_i/P_i) is larger than M (with M equal | 154 | If the total utilization U=sum(WCET_i/P_i) is larger than M (with M equal |
145 | to the number of CPUs), then the scheduler is unable to respect all the | 155 | to the number of CPUs), then the scheduler is unable to respect all the |
146 | deadlines. | 156 | deadlines. |
147 | Note that total utilisation is defined as the sum of the utilisations | 157 | Note that total utilization is defined as the sum of the utilizations |
148 | WCET_i/P_i over all the real-time tasks in the system. When considering | 158 | WCET_i/P_i over all the real-time tasks in the system. When considering |
149 | multiple real-time tasks, the parameters of the i-th task are indicated | 159 | multiple real-time tasks, the parameters of the i-th task are indicated |
150 | with the "_i" suffix. | 160 | with the "_i" suffix. |
151 | Moreover, if the total utilisation is larger than M, then we risk starving | 161 | Moreover, if the total utilization is larger than M, then we risk starving |
152 | non- real-time tasks by real-time tasks. | 162 | non- real-time tasks by real-time tasks. |
153 | If, instead, the total utilisation is smaller than M, then non real-time | 163 | If, instead, the total utilization is smaller than M, then non real-time |
154 | tasks will not be starved and the system might be able to respect all the | 164 | tasks will not be starved and the system might be able to respect all the |
155 | deadlines. | 165 | deadlines. |
156 | As a matter of fact, in this case it is possible to provide an upper bound | 166 | As a matter of fact, in this case it is possible to provide an upper bound |
@@ -159,38 +169,119 @@ CONTENTS | |||
159 | More precisely, it can be proven that using a global EDF scheduler the | 169 | More precisely, it can be proven that using a global EDF scheduler the |
160 | maximum tardiness of each task is smaller or equal than | 170 | maximum tardiness of each task is smaller or equal than |
161 | ((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max | 171 | ((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max |
162 | where WCET_max = max_i{WCET_i} is the maximum WCET, WCET_min=min_i{WCET_i} | 172 | where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i} |
163 | is the minimum WCET, and U_max = max_i{WCET_i/P_i} is the maximum utilisation. | 173 | is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum |
174 | utilization[12]. | ||
175 | |||
176 | 3.2 Schedulability Analysis for Uniprocessor Systems | ||
177 | ------------------------ | ||
164 | 178 | ||
165 | If M=1 (uniprocessor system), or in case of partitioned scheduling (each | 179 | If M=1 (uniprocessor system), or in case of partitioned scheduling (each |
166 | real-time task is statically assigned to one and only one CPU), it is | 180 | real-time task is statically assigned to one and only one CPU), it is |
167 | possible to formally check if all the deadlines are respected. | 181 | possible to formally check if all the deadlines are respected. |
168 | If D_i = P_i for all tasks, then EDF is able to respect all the deadlines | 182 | If D_i = P_i for all tasks, then EDF is able to respect all the deadlines |
169 | of all the tasks executing on a CPU if and only if the total utilisation | 183 | of all the tasks executing on a CPU if and only if the total utilization |
170 | of the tasks running on such a CPU is smaller or equal than 1. | 184 | of the tasks running on such a CPU is smaller or equal than 1. |
171 | If D_i != P_i for some task, then it is possible to define the density of | 185 | If D_i != P_i for some task, then it is possible to define the density of |
172 | a task as C_i/min{D_i,T_i}, and EDF is able to respect all the deadlines | 186 | a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines |
173 | of all the tasks running on a CPU if the sum sum_i C_i/min{D_i,T_i} of the | 187 | of all the tasks running on a CPU if the sum of the densities of the tasks |
174 | densities of the tasks running on such a CPU is smaller or equal than 1 | 188 | running on such a CPU is smaller or equal than 1: |
175 | (notice that this condition is only sufficient, and not necessary). | 189 | sum(WCET_i / min{D_i, P_i}) <= 1 |
190 | It is important to notice that this condition is only sufficient, and not | ||
191 | necessary: there are task sets that are schedulable, but do not respect the | ||
192 | condition. For example, consider the task set {Task_1,Task_2} composed by | ||
193 | Task_1=(50ms,50ms,100ms) and Task_2=(10ms,100ms,100ms). | ||
194 | EDF is clearly able to schedule the two tasks without missing any deadline | ||
195 | (Task_1 is scheduled as soon as it is released, and finishes just in time | ||
196 | to respect its deadline; Task_2 is scheduled immediately after Task_1, hence | ||
197 | its response time cannot be larger than 50ms + 10ms = 60ms) even if | ||
198 | 50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1 | ||
199 | Of course it is possible to test the exact schedulability of tasks with | ||
200 | D_i != P_i (checking a condition that is both sufficient and necessary), | ||
201 | but this cannot be done by comparing the total utilization or density with | ||
202 | a constant. Instead, the so called "processor demand" approach can be used, | ||
203 | computing the total amount of CPU time h(t) needed by all the tasks to | ||
204 | respect all of their deadlines in a time interval of size t, and comparing | ||
205 | such a time with the interval size t. If h(t) is smaller than t (that is, | ||
206 | the amount of time needed by the tasks in a time interval of size t is | ||
207 | smaller than the size of the interval) for all the possible values of t, then | ||
208 | EDF is able to schedule the tasks respecting all of their deadlines. Since | ||
209 | performing this check for all possible values of t is impossible, it has been | ||
210 | proven[4,5,6] that it is sufficient to perform the test for values of t | ||
211 | between 0 and a maximum value L. The cited papers contain all of the | ||
212 | mathematical details and explain how to compute h(t) and L. | ||
213 | In any case, this kind of analysis is too complex as well as too | ||
214 | time-consuming to be performed on-line. Hence, as explained in Section | ||
215 | 4 Linux uses an admission test based on the tasks' utilizations. | ||
216 | |||
217 | 3.3 Schedulability Analysis for Multiprocessor Systems | ||
218 | ------------------------ | ||
176 | 219 | ||
177 | On multiprocessor systems with global EDF scheduling (non partitioned | 220 | On multiprocessor systems with global EDF scheduling (non partitioned |
178 | systems), a sufficient test for schedulability can not be based on the | 221 | systems), a sufficient test for schedulability can not be based on the |
179 | utilisations (it can be shown that task sets with utilisations slightly | 222 | utilizations or densities: it can be shown that even if D_i = P_i task |
180 | larger than 1 can miss deadlines regardless of the number of CPUs M). | 223 | sets with utilizations slightly larger than 1 can miss deadlines regardless |
181 | However, as previously stated, enforcing that the total utilisation is smaller | 224 | of the number of CPUs. |
182 | than M is enough to guarantee that non real-time tasks are not starved and | 225 | |
183 | that the tardiness of real-time tasks has an upper bound. | 226 | Consider a set {Task_1,...Task_{M+1}} of M+1 tasks on a system with M |
227 | CPUs, with the first task Task_1=(P,P,P) having period, relative deadline | ||
228 | and WCET equal to P. The remaining M tasks Task_i=(e,P-1,P-1) have an | ||
229 | arbitrarily small worst case execution time (indicated as "e" here) and a | ||
230 | period smaller than the one of the first task. Hence, if all the tasks | ||
231 | activate at the same time t, global EDF schedules these M tasks first | ||
232 | (because their absolute deadlines are equal to t + P - 1, hence they are | ||
233 | smaller than the absolute deadline of Task_1, which is t + P). As a | ||
234 | result, Task_1 can be scheduled only at time t + e, and will finish at | ||
235 | time t + e + P, after its absolute deadline. The total utilization of the | ||
236 | task set is U = M · e / (P - 1) + P / P = M · e / (P - 1) + 1, and for small | ||
237 | values of e this can become very close to 1. This is known as "Dhall's | ||
238 | effect"[7]. Note: the example in the original paper by Dhall has been | ||
239 | slightly simplified here (for example, Dhall more correctly computed | ||
240 | lim_{e->0}U). | ||
241 | |||
242 | More complex schedulability tests for global EDF have been developed in | ||
243 | real-time literature[8,9], but they are not based on a simple comparison | ||
244 | between total utilization (or density) and a fixed constant. If all tasks | ||
245 | have D_i = P_i, a sufficient schedulability condition can be expressed in | ||
246 | a simple way: | ||
247 | sum(WCET_i / P_i) <= M - (M - 1) · U_max | ||
248 | where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1, | ||
249 | M - (M - 1) · U_max becomes M - M + 1 = 1 and this schedulability condition | ||
250 | just confirms the Dhall's effect. A more complete survey of the literature | ||
251 | about schedulability tests for multi-processor real-time scheduling can be | ||
252 | found in [11]. | ||
253 | |||
254 | As seen, enforcing that the total utilization is smaller than M does not | ||
255 | guarantee that global EDF schedules the tasks without missing any deadline | ||
256 | (in other words, global EDF is not an optimal scheduling algorithm). However, | ||
257 | a total utilization smaller than M is enough to guarantee that non real-time | ||
258 | tasks are not starved and that the tardiness of real-time tasks has an upper | ||
259 | bound[12] (as previously noted). Different bounds on the maximum tardiness | ||
260 | experienced by real-time tasks have been developed in various papers[13,14], | ||
261 | but the theoretical result that is important for SCHED_DEADLINE is that if | ||
262 | the total utilization is smaller or equal than M then the response times of | ||
263 | the tasks are limited. | ||
264 | |||
265 | 3.4 Relationship with SCHED_DEADLINE Parameters | ||
266 | ------------------------ | ||
184 | 267 | ||
185 | SCHED_DEADLINE can be used to schedule real-time tasks guaranteeing that | 268 | Finally, it is important to understand the relationship between the |
186 | the jobs' deadlines of a task are respected. In order to do this, a task | 269 | SCHED_DEADLINE scheduling parameters described in Section 2 (runtime, |
187 | must be scheduled by setting: | 270 | deadline and period) and the real-time task parameters (WCET, D, P) |
271 | described in this section. Note that the tasks' temporal constraints are | ||
272 | represented by its absolute deadlines d_j = r_j + D described above, while | ||
273 | SCHED_DEADLINE schedules the tasks according to scheduling deadlines (see | ||
274 | Section 2). | ||
275 | If an admission test is used to guarantee that the scheduling deadlines | ||
276 | are respected, then SCHED_DEADLINE can be used to schedule real-time tasks | ||
277 | guaranteeing that all the jobs' deadlines of a task are respected. | ||
278 | In order to do this, a task must be scheduled by setting: | ||
188 | 279 | ||
189 | - runtime >= WCET | 280 | - runtime >= WCET |
190 | - deadline = D | 281 | - deadline = D |
191 | - period <= P | 282 | - period <= P |
192 | 283 | ||
193 | IOW, if runtime >= WCET and if period is >= P, then the scheduling deadlines | 284 | IOW, if runtime >= WCET and if period is <= P, then the scheduling deadlines |
194 | and the absolute deadlines (d_j) coincide, so a proper admission control | 285 | and the absolute deadlines (d_j) coincide, so a proper admission control |
195 | allows to respect the jobs' absolute deadlines for this task (this is what is | 286 | allows to respect the jobs' absolute deadlines for this task (this is what is |
196 | called "hard schedulability property" and is an extension of Lemma 1 of [2]). | 287 | called "hard schedulability property" and is an extension of Lemma 1 of [2]). |
@@ -206,6 +297,39 @@ CONTENTS | |||
206 | Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf | 297 | Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf |
207 | 3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab | 298 | 3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab |
208 | Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf | 299 | Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf |
300 | 4 - J. Y. Leung and M.L. Merril. A Note on Preemptive Scheduling of | ||
301 | Periodic, Real-Time Tasks. Information Processing Letters, vol. 11, | ||
302 | no. 3, pp. 115-118, 1980. | ||
303 | 5 - S. K. Baruah, A. K. Mok and L. E. Rosier. Preemptively Scheduling | ||
304 | Hard-Real-Time Sporadic Tasks on One Processor. Proceedings of the | ||
305 | 11th IEEE Real-time Systems Symposium, 1990. | ||
306 | 6 - S. K. Baruah, L. E. Rosier and R. R. Howell. Algorithms and Complexity | ||
307 | Concerning the Preemptive Scheduling of Periodic Real-Time tasks on | ||
308 | One Processor. Real-Time Systems Journal, vol. 4, no. 2, pp 301-324, | ||
309 | 1990. | ||
310 | 7 - S. J. Dhall and C. L. Liu. On a real-time scheduling problem. Operations | ||
311 | research, vol. 26, no. 1, pp 127-140, 1978. | ||
312 | 8 - T. Baker. Multiprocessor EDF and Deadline Monotonic Schedulability | ||
313 | Analysis. Proceedings of the 24th IEEE Real-Time Systems Symposium, 2003. | ||
314 | 9 - T. Baker. An Analysis of EDF Schedulability on a Multiprocessor. | ||
315 | IEEE Transactions on Parallel and Distributed Systems, vol. 16, no. 8, | ||
316 | pp 760-768, 2005. | ||
317 | 10 - J. Goossens, S. Funk and S. Baruah, Priority-Driven Scheduling of | ||
318 | Periodic Task Systems on Multiprocessors. Real-Time Systems Journal, | ||
319 | vol. 25, no. 2–3, pp. 187–205, 2003. | ||
320 | 11 - R. Davis and A. Burns. A Survey of Hard Real-Time Scheduling for | ||
321 | Multiprocessor Systems. ACM Computing Surveys, vol. 43, no. 4, 2011. | ||
322 | http://www-users.cs.york.ac.uk/~robdavis/papers/MPSurveyv5.0.pdf | ||
323 | 12 - U. C. Devi and J. H. Anderson. Tardiness Bounds under Global EDF | ||
324 | Scheduling on a Multiprocessor. Real-Time Systems Journal, vol. 32, | ||
325 | no. 2, pp 133-189, 2008. | ||
326 | 13 - P. Valente and G. Lipari. An Upper Bound to the Lateness of Soft | ||
327 | Real-Time Tasks Scheduled by EDF on Multiprocessors. Proceedings of | ||
328 | the 26th IEEE Real-Time Systems Symposium, 2005. | ||
329 | 14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for | ||
330 | Global EDF. Proceedings of the 22nd Euromicro Conference on | ||
331 | Real-Time Systems, 2010. | ||
332 | |||
209 | 333 | ||
210 | 4. Bandwidth management | 334 | 4. Bandwidth management |
211 | ======================= | 335 | ======================= |
@@ -218,10 +342,10 @@ CONTENTS | |||
218 | no guarantee can be given on the actual scheduling of the -deadline tasks. | 342 | no guarantee can be given on the actual scheduling of the -deadline tasks. |
219 | 343 | ||
220 | As already stated in Section 3, a necessary condition to be respected to | 344 | As already stated in Section 3, a necessary condition to be respected to |
221 | correctly schedule a set of real-time tasks is that the total utilisation | 345 | correctly schedule a set of real-time tasks is that the total utilization |
222 | is smaller than M. When talking about -deadline tasks, this requires that | 346 | is smaller than M. When talking about -deadline tasks, this requires that |
223 | the sum of the ratio between runtime and period for all tasks is smaller | 347 | the sum of the ratio between runtime and period for all tasks is smaller |
224 | than M. Notice that the ratio runtime/period is equivalent to the utilisation | 348 | than M. Notice that the ratio runtime/period is equivalent to the utilization |
225 | of a "traditional" real-time task, and is also often referred to as | 349 | of a "traditional" real-time task, and is also often referred to as |
226 | "bandwidth". | 350 | "bandwidth". |
227 | The interface used to control the CPU bandwidth that can be allocated | 351 | The interface used to control the CPU bandwidth that can be allocated |
@@ -251,7 +375,7 @@ CONTENTS | |||
251 | The system wide settings are configured under the /proc virtual file system. | 375 | The system wide settings are configured under the /proc virtual file system. |
252 | 376 | ||
253 | For now the -rt knobs are used for -deadline admission control and the | 377 | For now the -rt knobs are used for -deadline admission control and the |
254 | -deadline runtime is accounted against the -rt runtime. We realise that this | 378 | -deadline runtime is accounted against the -rt runtime. We realize that this |
255 | isn't entirely desirable; however, it is better to have a small interface for | 379 | isn't entirely desirable; however, it is better to have a small interface for |
256 | now, and be able to change it easily later. The ideal situation (see 5.) is to | 380 | now, and be able to change it easily later. The ideal situation (see 5.) is to |
257 | run -rt tasks from a -deadline server; in which case the -rt bandwidth is a | 381 | run -rt tasks from a -deadline server; in which case the -rt bandwidth is a |
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 9d0ac091a52a..4a905bd667e2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c | |||
@@ -23,8 +23,7 @@ | |||
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | 26 | #include <linux/uaccess.h> | |
27 | #include <asm/uaccess.h> | ||
28 | 27 | ||
29 | extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); | 28 | extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); |
30 | 29 | ||
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, | |||
107 | 106 | ||
108 | /* If we're in an interrupt context, or have no user context, | 107 | /* If we're in an interrupt context, or have no user context, |
109 | we must not take the fault. */ | 108 | we must not take the fault. */ |
110 | if (!mm || in_atomic()) | 109 | if (!mm || faulthandler_disabled()) |
111 | goto no_context; | 110 | goto no_context; |
112 | 111 | ||
113 | #ifdef CONFIG_ALPHA_LARGE_VMALLOC | 112 | #ifdef CONFIG_ALPHA_LARGE_VMALLOC |
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 4dc64ddebece..05b5aaf5b0f9 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h | |||
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
53 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | 53 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) |
54 | return -EFAULT; | 54 | return -EFAULT; |
55 | 55 | ||
56 | pagefault_disable(); /* implies preempt_disable() */ | 56 | pagefault_disable(); |
57 | 57 | ||
58 | switch (op) { | 58 | switch (op) { |
59 | case FUTEX_OP_SET: | 59 | case FUTEX_OP_SET: |
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
75 | ret = -ENOSYS; | 75 | ret = -ENOSYS; |
76 | } | 76 | } |
77 | 77 | ||
78 | pagefault_enable(); /* subsumes preempt_enable() */ | 78 | pagefault_enable(); |
79 | 79 | ||
80 | if (!ret) { | 80 | if (!ret) { |
81 | switch (cmp) { | 81 | switch (cmp) { |
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
104 | return ret; | 104 | return ret; |
105 | } | 105 | } |
106 | 106 | ||
107 | /* Compare-xchg with preemption disabled. | 107 | /* Compare-xchg with pagefaults disabled. |
108 | * Notes: | 108 | * Notes: |
109 | * -Best-Effort: Exchg happens only if compare succeeds. | 109 | * -Best-Effort: Exchg happens only if compare succeeds. |
110 | * If compare fails, returns; leaving retry/looping to upper layers | 110 | * If compare fails, returns; leaving retry/looping to upper layers |
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, | |||
121 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | 121 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) |
122 | return -EFAULT; | 122 | return -EFAULT; |
123 | 123 | ||
124 | pagefault_disable(); /* implies preempt_disable() */ | 124 | pagefault_disable(); |
125 | 125 | ||
126 | /* TBD : can use llock/scond */ | 126 | /* TBD : can use llock/scond */ |
127 | __asm__ __volatile__( | 127 | __asm__ __volatile__( |
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, | |||
142 | : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) | 142 | : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) |
143 | : "cc", "memory"); | 143 | : "cc", "memory"); |
144 | 144 | ||
145 | pagefault_enable(); /* subsumes preempt_enable() */ | 145 | pagefault_enable(); |
146 | 146 | ||
147 | *uval = val; | 147 | *uval = val; |
148 | return val; | 148 | return val; |
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 6a2e006cbcce..d948e4e9d89c 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c | |||
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) | |||
86 | * If we're in an interrupt or have no user | 86 | * If we're in an interrupt or have no user |
87 | * context, we must not take the fault.. | 87 | * context, we must not take the fault.. |
88 | */ | 88 | */ |
89 | if (in_atomic() || !mm) | 89 | if (faulthandler_disabled() || !mm) |
90 | goto no_context; | 90 | goto no_context; |
91 | 91 | ||
92 | if (user_mode(regs)) | 92 | if (user_mode(regs)) |
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 4e78065a16aa..5eed82809d82 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h | |||
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
93 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | 93 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
94 | return -EFAULT; | 94 | return -EFAULT; |
95 | 95 | ||
96 | preempt_disable(); | ||
96 | __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" | 97 | __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" |
97 | "1: " TUSER(ldr) " %1, [%4]\n" | 98 | "1: " TUSER(ldr) " %1, [%4]\n" |
98 | " teq %1, %2\n" | 99 | " teq %1, %2\n" |
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
104 | : "cc", "memory"); | 105 | : "cc", "memory"); |
105 | 106 | ||
106 | *uval = val; | 107 | *uval = val; |
108 | preempt_enable(); | ||
109 | |||
107 | return ret; | 110 | return ret; |
108 | } | 111 | } |
109 | 112 | ||
@@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
124 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | 127 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
125 | return -EFAULT; | 128 | return -EFAULT; |
126 | 129 | ||
127 | pagefault_disable(); /* implies preempt_disable() */ | 130 | #ifndef CONFIG_SMP |
131 | preempt_disable(); | ||
132 | #endif | ||
133 | pagefault_disable(); | ||
128 | 134 | ||
129 | switch (op) { | 135 | switch (op) { |
130 | case FUTEX_OP_SET: | 136 | case FUTEX_OP_SET: |
@@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
146 | ret = -ENOSYS; | 152 | ret = -ENOSYS; |
147 | } | 153 | } |
148 | 154 | ||
149 | pagefault_enable(); /* subsumes preempt_enable() */ | 155 | pagefault_enable(); |
156 | #ifndef CONFIG_SMP | ||
157 | preempt_enable(); | ||
158 | #endif | ||
150 | 159 | ||
151 | if (!ret) { | 160 | if (!ret) { |
152 | switch (cmp) { | 161 | switch (cmp) { |
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 2fe85fff5cca..370f7a732900 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h | |||
@@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; | |||
18 | #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) | 18 | #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) |
19 | #define topology_core_id(cpu) (cpu_topology[cpu].core_id) | 19 | #define topology_core_id(cpu) (cpu_topology[cpu].core_id) |
20 | #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) | 20 | #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) |
21 | #define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) | 21 | #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) |
22 | 22 | ||
23 | void init_cpu_topology(void); | 23 | void init_cpu_topology(void); |
24 | void store_cpu_topology(unsigned int cpuid); | 24 | void store_cpu_topology(unsigned int cpuid); |
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6333d9c17875..0d629b8f973f 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c | |||
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |||
276 | * If we're in an interrupt or have no user | 276 | * If we're in an interrupt or have no user |
277 | * context, we must not take the fault.. | 277 | * context, we must not take the fault.. |
278 | */ | 278 | */ |
279 | if (in_atomic() || !mm) | 279 | if (faulthandler_disabled() || !mm) |
280 | goto no_context; | 280 | goto no_context; |
281 | 281 | ||
282 | if (user_mode(regs)) | 282 | if (user_mode(regs)) |
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index b98895d9fe57..ee8dfa793989 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c | |||
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page) | |||
59 | void *kmap; | 59 | void *kmap; |
60 | int type; | 60 | int type; |
61 | 61 | ||
62 | preempt_disable(); | ||
62 | pagefault_disable(); | 63 | pagefault_disable(); |
63 | if (!PageHighMem(page)) | 64 | if (!PageHighMem(page)) |
64 | return page_address(page); | 65 | return page_address(page); |
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr) | |||
121 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | 122 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); |
122 | } | 123 | } |
123 | pagefault_enable(); | 124 | pagefault_enable(); |
125 | preempt_enable(); | ||
124 | } | 126 | } |
125 | EXPORT_SYMBOL(__kunmap_atomic); | 127 | EXPORT_SYMBOL(__kunmap_atomic); |
126 | 128 | ||
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn) | |||
130 | int idx, type; | 132 | int idx, type; |
131 | struct page *page = pfn_to_page(pfn); | 133 | struct page *page = pfn_to_page(pfn); |
132 | 134 | ||
135 | preempt_disable(); | ||
133 | pagefault_disable(); | 136 | pagefault_disable(); |
134 | if (!PageHighMem(page)) | 137 | if (!PageHighMem(page)) |
135 | return page_address(page); | 138 | return page_address(page); |
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5f750dc96e0f..74069b3bd919 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h | |||
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
58 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | 58 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
59 | return -EFAULT; | 59 | return -EFAULT; |
60 | 60 | ||
61 | pagefault_disable(); /* implies preempt_disable() */ | 61 | pagefault_disable(); |
62 | 62 | ||
63 | switch (op) { | 63 | switch (op) { |
64 | case FUTEX_OP_SET: | 64 | case FUTEX_OP_SET: |
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
85 | ret = -ENOSYS; | 85 | ret = -ENOSYS; |
86 | } | 86 | } |
87 | 87 | ||
88 | pagefault_enable(); /* subsumes preempt_enable() */ | 88 | pagefault_enable(); |
89 | 89 | ||
90 | if (!ret) { | 90 | if (!ret) { |
91 | switch (cmp) { | 91 | switch (cmp) { |
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7ebcd31ce51c..225ec3524fbf 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h | |||
@@ -18,7 +18,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; | |||
18 | #define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) | 18 | #define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) |
19 | #define topology_core_id(cpu) (cpu_topology[cpu].core_id) | 19 | #define topology_core_id(cpu) (cpu_topology[cpu].core_id) |
20 | #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) | 20 | #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) |
21 | #define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) | 21 | #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) |
22 | 22 | ||
23 | void init_cpu_topology(void); | 23 | void init_cpu_topology(void); |
24 | void store_cpu_topology(unsigned int cpuid); | 24 | void store_cpu_topology(unsigned int cpuid); |
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 96da13167d4a..0948d327d013 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c | |||
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, | |||
211 | * If we're in an interrupt or have no user context, we must not take | 211 | * If we're in an interrupt or have no user context, we must not take |
212 | * the fault. | 212 | * the fault. |
213 | */ | 213 | */ |
214 | if (in_atomic() || !mm) | 214 | if (faulthandler_disabled() || !mm) |
215 | goto no_context; | 215 | goto no_context; |
216 | 216 | ||
217 | if (user_mode(regs)) | 217 | if (user_mode(regs)) |
diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index a46f7cf3e1ea..68cf638faf48 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h | |||
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_from_user(void *to, | |||
97 | * @x: Value to copy to user space. | 97 | * @x: Value to copy to user space. |
98 | * @ptr: Destination address, in user space. | 98 | * @ptr: Destination address, in user space. |
99 | * | 99 | * |
100 | * Context: User context only. This function may sleep. | 100 | * Context: User context only. This function may sleep if pagefaults are |
101 | * enabled. | ||
101 | * | 102 | * |
102 | * This macro copies a single simple value from kernel space to user | 103 | * This macro copies a single simple value from kernel space to user |
103 | * space. It supports simple types like char and int, but not larger | 104 | * space. It supports simple types like char and int, but not larger |
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_from_user(void *to, | |||
116 | * @x: Variable to store result. | 117 | * @x: Variable to store result. |
117 | * @ptr: Source address, in user space. | 118 | * @ptr: Source address, in user space. |
118 | * | 119 | * |
119 | * Context: User context only. This function may sleep. | 120 | * Context: User context only. This function may sleep if pagefaults are |
121 | * enabled. | ||
120 | * | 122 | * |
121 | * This macro copies a single simple variable from user space to kernel | 123 | * This macro copies a single simple variable from user space to kernel |
122 | * space. It supports simple types like char and int, but not larger | 124 | * space. It supports simple types like char and int, but not larger |
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_from_user(void *to, | |||
136 | * @x: Value to copy to user space. | 138 | * @x: Value to copy to user space. |
137 | * @ptr: Destination address, in user space. | 139 | * @ptr: Destination address, in user space. |
138 | * | 140 | * |
139 | * Context: User context only. This function may sleep. | 141 | * Context: User context only. This function may sleep if pagefaults are |
142 | * enabled. | ||
140 | * | 143 | * |
141 | * This macro copies a single simple value from kernel space to user | 144 | * This macro copies a single simple value from kernel space to user |
142 | * space. It supports simple types like char and int, but not larger | 145 | * space. It supports simple types like char and int, but not larger |
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_from_user(void *to, | |||
158 | * @x: Variable to store result. | 161 | * @x: Variable to store result. |
159 | * @ptr: Source address, in user space. | 162 | * @ptr: Source address, in user space. |
160 | * | 163 | * |
161 | * Context: User context only. This function may sleep. | 164 | * Context: User context only. This function may sleep if pagefaults are |
165 | * enabled. | ||
162 | * | 166 | * |
163 | * This macro copies a single simple variable from user space to kernel | 167 | * This macro copies a single simple variable from user space to kernel |
164 | * space. It supports simple types like char and int, but not larger | 168 | * space. It supports simple types like char and int, but not larger |
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index d223a8b57c1e..c03533937a9f 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c | |||
@@ -14,11 +14,11 @@ | |||
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/kdebug.h> | 15 | #include <linux/kdebug.h> |
16 | #include <linux/kprobes.h> | 16 | #include <linux/kprobes.h> |
17 | #include <linux/uaccess.h> | ||
17 | 18 | ||
18 | #include <asm/mmu_context.h> | 19 | #include <asm/mmu_context.h> |
19 | #include <asm/sysreg.h> | 20 | #include <asm/sysreg.h> |
20 | #include <asm/tlb.h> | 21 | #include <asm/tlb.h> |
21 | #include <asm/uaccess.h> | ||
22 | 22 | ||
23 | #ifdef CONFIG_KPROBES | 23 | #ifdef CONFIG_KPROBES |
24 | static inline int notify_page_fault(struct pt_regs *regs, int trap) | 24 | static inline int notify_page_fault(struct pt_regs *regs, int trap) |
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) | |||
81 | * If we're in an interrupt or have no user context, we must | 81 | * If we're in an interrupt or have no user context, we must |
82 | * not take the fault... | 82 | * not take the fault... |
83 | */ | 83 | */ |
84 | if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) | 84 | if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM)) |
85 | goto no_context; | 85 | goto no_context; |
86 | 86 | ||
87 | local_irq_enable(); | 87 | local_irq_enable(); |
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 83f12f2ed9e3..3066d40a6db1 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/interrupt.h> | 8 | #include <linux/interrupt.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
11 | #include <asm/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <arch/system.h> | 12 | #include <arch/system.h> |
13 | 13 | ||
14 | extern int find_fixup_code(struct pt_regs *); | 14 | extern int find_fixup_code(struct pt_regs *); |
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs, | |||
109 | info.si_code = SEGV_MAPERR; | 109 | info.si_code = SEGV_MAPERR; |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * If we're in an interrupt or "atomic" operation or have no | 112 | * If we're in an interrupt, have pagefaults disabled or have no |
113 | * user context, we must not take the fault. | 113 | * user context, we must not take the fault. |
114 | */ | 114 | */ |
115 | 115 | ||
116 | if (in_atomic() || !mm) | 116 | if (faulthandler_disabled() || !mm) |
117 | goto no_context; | 117 | goto no_context; |
118 | 118 | ||
119 | if (user_mode(regs)) | 119 | if (user_mode(regs)) |
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index ec4917ddf678..61d99767fe16 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c | |||
@@ -19,9 +19,9 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/ptrace.h> | 20 | #include <linux/ptrace.h> |
21 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
22 | #include <linux/uaccess.h> | ||
22 | 23 | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/gdb-stub.h> | 25 | #include <asm/gdb-stub.h> |
26 | 26 | ||
27 | /*****************************************************************************/ | 27 | /*****************************************************************************/ |
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear | |||
78 | * If we're in an interrupt or have no user | 78 | * If we're in an interrupt or have no user |
79 | * context, we must not take the fault.. | 79 | * context, we must not take the fault.. |
80 | */ | 80 | */ |
81 | if (in_atomic() || !mm) | 81 | if (faulthandler_disabled() || !mm) |
82 | goto no_context; | 82 | goto no_context; |
83 | 83 | ||
84 | if (user_mode(__frame)) | 84 | if (user_mode(__frame)) |
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index bed9a9bd3c10..785344bbdc07 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c | |||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) | |||
42 | unsigned long paddr; | 42 | unsigned long paddr; |
43 | int type; | 43 | int type; |
44 | 44 | ||
45 | preempt_disable(); | ||
45 | pagefault_disable(); | 46 | pagefault_disable(); |
46 | type = kmap_atomic_idx_push(); | 47 | type = kmap_atomic_idx_push(); |
47 | paddr = page_to_phys(page); | 48 | paddr = page_to_phys(page); |
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr) | |||
85 | } | 86 | } |
86 | kmap_atomic_idx_pop(); | 87 | kmap_atomic_idx_pop(); |
87 | pagefault_enable(); | 88 | pagefault_enable(); |
89 | preempt_enable(); | ||
88 | } | 90 | } |
89 | EXPORT_SYMBOL(__kunmap_atomic); | 91 | EXPORT_SYMBOL(__kunmap_atomic); |
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index e4127e4d6a5b..f000a382bc7f 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h | |||
@@ -36,7 +36,8 @@ | |||
36 | * @addr: User space pointer to start of block to check | 36 | * @addr: User space pointer to start of block to check |
37 | * @size: Size of block to check | 37 | * @size: Size of block to check |
38 | * | 38 | * |
39 | * Context: User context only. This function may sleep. | 39 | * Context: User context only. This function may sleep if pagefaults are |
40 | * enabled. | ||
40 | * | 41 | * |
41 | * Checks if a pointer to a block of memory in user space is valid. | 42 | * Checks if a pointer to a block of memory in user space is valid. |
42 | * | 43 | * |
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 6437ca21f61b..3ad8f6988363 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h | |||
@@ -53,7 +53,7 @@ void build_cpu_to_node_map(void); | |||
53 | #define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) | 53 | #define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) |
54 | #define topology_core_id(cpu) (cpu_data(cpu)->core_id) | 54 | #define topology_core_id(cpu) (cpu_data(cpu)->core_id) |
55 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) | 55 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) |
56 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) | 56 | #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | extern void arch_fix_phys_package_id(int num, u32 slot); | 59 | extern void arch_fix_phys_package_id(int num, u32 slot); |
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index ba5ba7accd0d..70b40d1205a6 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c | |||
@@ -11,10 +11,10 @@ | |||
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> |
12 | #include <linux/kdebug.h> | 12 | #include <linux/kdebug.h> |
13 | #include <linux/prefetch.h> | 13 | #include <linux/prefetch.h> |
14 | #include <linux/uaccess.h> | ||
14 | 15 | ||
15 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
17 | #include <asm/uaccess.h> | ||
18 | 18 | ||
19 | extern int die(char *, struct pt_regs *, long); | 19 | extern int die(char *, struct pt_regs *, long); |
20 | 20 | ||
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re | |||
96 | /* | 96 | /* |
97 | * If we're in an interrupt or have no user context, we must not take the fault.. | 97 | * If we're in an interrupt or have no user context, we must not take the fault.. |
98 | */ | 98 | */ |
99 | if (in_atomic() || !mm) | 99 | if (faulthandler_disabled() || !mm) |
100 | goto no_context; | 100 | goto no_context; |
101 | 101 | ||
102 | #ifdef CONFIG_VIRTUAL_MEM_MAP | 102 | #ifdef CONFIG_VIRTUAL_MEM_MAP |
diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index 71adff209405..cac7014daef3 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h | |||
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s) | |||
91 | * @addr: User space pointer to start of block to check | 91 | * @addr: User space pointer to start of block to check |
92 | * @size: Size of block to check | 92 | * @size: Size of block to check |
93 | * | 93 | * |
94 | * Context: User context only. This function may sleep. | 94 | * Context: User context only. This function may sleep if pagefaults are |
95 | * enabled. | ||
95 | * | 96 | * |
96 | * Checks if a pointer to a block of memory in user space is valid. | 97 | * Checks if a pointer to a block of memory in user space is valid. |
97 | * | 98 | * |
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs); | |||
155 | * @x: Variable to store result. | 156 | * @x: Variable to store result. |
156 | * @ptr: Source address, in user space. | 157 | * @ptr: Source address, in user space. |
157 | * | 158 | * |
158 | * Context: User context only. This function may sleep. | 159 | * Context: User context only. This function may sleep if pagefaults are |
160 | * enabled. | ||
159 | * | 161 | * |
160 | * This macro copies a single simple variable from user space to kernel | 162 | * This macro copies a single simple variable from user space to kernel |
161 | * space. It supports simple types like char and int, but not larger | 163 | * space. It supports simple types like char and int, but not larger |
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs); | |||
175 | * @x: Value to copy to user space. | 177 | * @x: Value to copy to user space. |
176 | * @ptr: Destination address, in user space. | 178 | * @ptr: Destination address, in user space. |
177 | * | 179 | * |
178 | * Context: User context only. This function may sleep. | 180 | * Context: User context only. This function may sleep if pagefaults are |
181 | * enabled. | ||
179 | * | 182 | * |
180 | * This macro copies a single simple value from kernel space to user | 183 | * This macro copies a single simple value from kernel space to user |
181 | * space. It supports simple types like char and int, but not larger | 184 | * space. It supports simple types like char and int, but not larger |
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs); | |||
194 | * @x: Variable to store result. | 197 | * @x: Variable to store result. |
195 | * @ptr: Source address, in user space. | 198 | * @ptr: Source address, in user space. |
196 | * | 199 | * |
197 | * Context: User context only. This function may sleep. | 200 | * Context: User context only. This function may sleep if pagefaults are |
201 | * enabled. | ||
198 | * | 202 | * |
199 | * This macro copies a single simple variable from user space to kernel | 203 | * This macro copies a single simple variable from user space to kernel |
200 | * space. It supports simple types like char and int, but not larger | 204 | * space. It supports simple types like char and int, but not larger |
@@ -274,7 +278,8 @@ do { \ | |||
274 | * @x: Value to copy to user space. | 278 | * @x: Value to copy to user space. |
275 | * @ptr: Destination address, in user space. | 279 | * @ptr: Destination address, in user space. |
276 | * | 280 | * |
277 | * Context: User context only. This function may sleep. | 281 | * Context: User context only. This function may sleep if pagefaults are |
282 | * enabled. | ||
278 | * | 283 | * |
279 | * This macro copies a single simple value from kernel space to user | 284 | * This macro copies a single simple value from kernel space to user |
280 | * space. It supports simple types like char and int, but not larger | 285 | * space. It supports simple types like char and int, but not larger |
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon | |||
568 | * @from: Source address, in kernel space. | 573 | * @from: Source address, in kernel space. |
569 | * @n: Number of bytes to copy. | 574 | * @n: Number of bytes to copy. |
570 | * | 575 | * |
571 | * Context: User context only. This function may sleep. | 576 | * Context: User context only. This function may sleep if pagefaults are |
577 | * enabled. | ||
572 | * | 578 | * |
573 | * Copy data from kernel space to user space. Caller must check | 579 | * Copy data from kernel space to user space. Caller must check |
574 | * the specified block with access_ok() before calling this function. | 580 | * the specified block with access_ok() before calling this function. |
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon | |||
588 | * @from: Source address, in kernel space. | 594 | * @from: Source address, in kernel space. |
589 | * @n: Number of bytes to copy. | 595 | * @n: Number of bytes to copy. |
590 | * | 596 | * |
591 | * Context: User context only. This function may sleep. | 597 | * Context: User context only. This function may sleep if pagefaults are |
598 | * enabled. | ||
592 | * | 599 | * |
593 | * Copy data from kernel space to user space. | 600 | * Copy data from kernel space to user space. |
594 | * | 601 | * |
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon | |||
606 | * @from: Source address, in user space. | 613 | * @from: Source address, in user space. |
607 | * @n: Number of bytes to copy. | 614 | * @n: Number of bytes to copy. |
608 | * | 615 | * |
609 | * Context: User context only. This function may sleep. | 616 | * Context: User context only. This function may sleep if pagefaults are |
617 | * enabled. | ||
610 | * | 618 | * |
611 | * Copy data from user space to kernel space. Caller must check | 619 | * Copy data from user space to kernel space. Caller must check |
612 | * the specified block with access_ok() before calling this function. | 620 | * the specified block with access_ok() before calling this function. |
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon | |||
626 | * @from: Source address, in user space. | 634 | * @from: Source address, in user space. |
627 | * @n: Number of bytes to copy. | 635 | * @n: Number of bytes to copy. |
628 | * | 636 | * |
629 | * Context: User context only. This function may sleep. | 637 | * Context: User context only. This function may sleep if pagefaults are |
638 | * enabled. | ||
630 | * | 639 | * |
631 | * Copy data from user space to kernel space. | 640 | * Copy data from user space to kernel space. |
632 | * | 641 | * |
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len); | |||
677 | * strlen_user: - Get the size of a string in user space. | 686 | * strlen_user: - Get the size of a string in user space. |
678 | * @str: The string to measure. | 687 | * @str: The string to measure. |
679 | * | 688 | * |
680 | * Context: User context only. This function may sleep. | 689 | * Context: User context only. This function may sleep if pagefaults are |
690 | * enabled. | ||
681 | * | 691 | * |
682 | * Get the size of a NUL-terminated string in user space. | 692 | * Get the size of a NUL-terminated string in user space. |
683 | * | 693 | * |
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index e3d4d4890104..8f9875b7933d 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c | |||
@@ -24,9 +24,9 @@ | |||
24 | #include <linux/vt_kern.h> /* For unblank_screen() */ | 24 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
25 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | #include <linux/uaccess.h> | ||
27 | 28 | ||
28 | #include <asm/m32r.h> | 29 | #include <asm/m32r.h> |
29 | #include <asm/uaccess.h> | ||
30 | #include <asm/hardirq.h> | 30 | #include <asm/hardirq.h> |
31 | #include <asm/mmu_context.h> | 31 | #include <asm/mmu_context.h> |
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
111 | mm = tsk->mm; | 111 | mm = tsk->mm; |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * If we're in an interrupt or have no user context or are running in an | 114 | * If we're in an interrupt or have no user context or have pagefaults |
115 | * atomic region then we must not take the fault.. | 115 | * disabled then we must not take the fault. |
116 | */ | 116 | */ |
117 | if (in_atomic() || !mm) | 117 | if (faulthandler_disabled() || !mm) |
118 | goto bad_area_nosemaphore; | 118 | goto bad_area_nosemaphore; |
119 | 119 | ||
120 | if (error_code & ACE_USERMODE) | 120 | if (error_code & ACE_USERMODE) |
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h index a823cd73dc09..b5941818346f 100644 --- a/arch/m68k/include/asm/irqflags.h +++ b/arch/m68k/include/asm/irqflags.h | |||
@@ -2,9 +2,6 @@ | |||
2 | #define _M68K_IRQFLAGS_H | 2 | #define _M68K_IRQFLAGS_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #ifdef CONFIG_MMU | ||
6 | #include <linux/preempt_mask.h> | ||
7 | #endif | ||
8 | #include <linux/preempt.h> | 5 | #include <linux/preempt.h> |
9 | #include <asm/thread_info.h> | 6 | #include <asm/thread_info.h> |
10 | #include <asm/entry.h> | 7 | #include <asm/entry.h> |
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index b2f04aee46ec..6a94cdd0c830 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c | |||
@@ -10,10 +10,10 @@ | |||
10 | #include <linux/ptrace.h> | 10 | #include <linux/ptrace.h> |
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/uaccess.h> | ||
13 | 14 | ||
14 | #include <asm/setup.h> | 15 | #include <asm/setup.h> |
15 | #include <asm/traps.h> | 16 | #include <asm/traps.h> |
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/pgalloc.h> | 17 | #include <asm/pgalloc.h> |
18 | 18 | ||
19 | extern void die_if_kernel(char *, struct pt_regs *, long); | 19 | extern void die_if_kernel(char *, struct pt_regs *, long); |
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, | |||
81 | * If we're in an interrupt or have no user | 81 | * If we're in an interrupt or have no user |
82 | * context, we must not take the fault.. | 82 | * context, we must not take the fault.. |
83 | */ | 83 | */ |
84 | if (in_atomic() || !mm) | 84 | if (faulthandler_disabled() || !mm) |
85 | goto no_context; | 85 | goto no_context; |
86 | 86 | ||
87 | if (user_mode(regs)) | 87 | if (user_mode(regs)) |
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 2de5dc695a87..f57edca63609 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c | |||
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, | |||
105 | 105 | ||
106 | mm = tsk->mm; | 106 | mm = tsk->mm; |
107 | 107 | ||
108 | if (in_atomic() || !mm) | 108 | if (faulthandler_disabled() || !mm) |
109 | goto no_context; | 109 | goto no_context; |
110 | 110 | ||
111 | if (user_mode(regs)) | 111 | if (user_mode(regs)) |
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c index d71f621a2c0b..807f1b1c4e65 100644 --- a/arch/metag/mm/highmem.c +++ b/arch/metag/mm/highmem.c | |||
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page) | |||
43 | unsigned long vaddr; | 43 | unsigned long vaddr; |
44 | int type; | 44 | int type; |
45 | 45 | ||
46 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 46 | preempt_disable(); |
47 | pagefault_disable(); | 47 | pagefault_disable(); |
48 | if (!PageHighMem(page)) | 48 | if (!PageHighMem(page)) |
49 | return page_address(page); | 49 | return page_address(page); |
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr) | |||
82 | } | 82 | } |
83 | 83 | ||
84 | pagefault_enable(); | 84 | pagefault_enable(); |
85 | preempt_enable(); | ||
85 | } | 86 | } |
86 | EXPORT_SYMBOL(__kunmap_atomic); | 87 | EXPORT_SYMBOL(__kunmap_atomic); |
87 | 88 | ||
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn) | |||
95 | unsigned long vaddr; | 96 | unsigned long vaddr; |
96 | int type; | 97 | int type; |
97 | 98 | ||
99 | preempt_disable(); | ||
98 | pagefault_disable(); | 100 | pagefault_disable(); |
99 | 101 | ||
100 | type = kmap_atomic_idx_push(); | 102 | type = kmap_atomic_idx_push(); |
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 62942fd12672..331b0d35f89c 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h | |||
@@ -178,7 +178,8 @@ extern long __user_bad(void); | |||
178 | * @x: Variable to store result. | 178 | * @x: Variable to store result. |
179 | * @ptr: Source address, in user space. | 179 | * @ptr: Source address, in user space. |
180 | * | 180 | * |
181 | * Context: User context only. This function may sleep. | 181 | * Context: User context only. This function may sleep if pagefaults are |
182 | * enabled. | ||
182 | * | 183 | * |
183 | * This macro copies a single simple variable from user space to kernel | 184 | * This macro copies a single simple variable from user space to kernel |
184 | * space. It supports simple types like char and int, but not larger | 185 | * space. It supports simple types like char and int, but not larger |
@@ -290,7 +291,8 @@ extern long __user_bad(void); | |||
290 | * @x: Value to copy to user space. | 291 | * @x: Value to copy to user space. |
291 | * @ptr: Destination address, in user space. | 292 | * @ptr: Destination address, in user space. |
292 | * | 293 | * |
293 | * Context: User context only. This function may sleep. | 294 | * Context: User context only. This function may sleep if pagefaults are |
295 | * enabled. | ||
294 | * | 296 | * |
295 | * This macro copies a single simple value from kernel space to user | 297 | * This macro copies a single simple value from kernel space to user |
296 | * space. It supports simple types like char and int, but not larger | 298 | * space. It supports simple types like char and int, but not larger |
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index d46a5ebb7570..177dfc003643 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c | |||
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, | |||
107 | if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) | 107 | if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) |
108 | is_write = 0; | 108 | is_write = 0; |
109 | 109 | ||
110 | if (unlikely(in_atomic() || !mm)) { | 110 | if (unlikely(faulthandler_disabled() || !mm)) { |
111 | if (kernel_mode(regs)) | 111 | if (kernel_mode(regs)) |
112 | goto bad_area_nosemaphore; | 112 | goto bad_area_nosemaphore; |
113 | 113 | ||
114 | /* in_atomic() in user mode is really bad, | 114 | /* faulthandler_disabled() in user mode is really bad, |
115 | as is current->mm == NULL. */ | 115 | as is current->mm == NULL. */ |
116 | pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n", | 116 | pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n", |
117 | mm); | 117 | mm); |
118 | pr_emerg("r15 = %lx MSR = %lx\n", | 118 | pr_emerg("r15 = %lx MSR = %lx\n", |
119 | regs->r15, regs->msr); | 119 | regs->r15, regs->msr); |
120 | die("Weird page fault", regs, SIGSEGV); | 120 | die("Weird page fault", regs, SIGSEGV); |
diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c index 5a92576fad92..2fcc5a52d84d 100644 --- a/arch/microblaze/mm/highmem.c +++ b/arch/microblaze/mm/highmem.c | |||
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |||
37 | unsigned long vaddr; | 37 | unsigned long vaddr; |
38 | int idx, type; | 38 | int idx, type; |
39 | 39 | ||
40 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 40 | preempt_disable(); |
41 | pagefault_disable(); | 41 | pagefault_disable(); |
42 | if (!PageHighMem(page)) | 42 | if (!PageHighMem(page)) |
43 | return page_address(page); | 43 | return page_address(page); |
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr) | |||
63 | 63 | ||
64 | if (vaddr < __fix_to_virt(FIX_KMAP_END)) { | 64 | if (vaddr < __fix_to_virt(FIX_KMAP_END)) { |
65 | pagefault_enable(); | 65 | pagefault_enable(); |
66 | preempt_enable(); | ||
66 | return; | 67 | return; |
67 | } | 68 | } |
68 | 69 | ||
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr) | |||
84 | #endif | 85 | #endif |
85 | kmap_atomic_idx_pop(); | 86 | kmap_atomic_idx_pop(); |
86 | pagefault_enable(); | 87 | pagefault_enable(); |
88 | preempt_enable(); | ||
87 | } | 89 | } |
88 | EXPORT_SYMBOL(__kunmap_atomic); | 90 | EXPORT_SYMBOL(__kunmap_atomic); |
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h index 3e307ec2afba..7afda4150a59 100644 --- a/arch/mips/include/asm/topology.h +++ b/arch/mips/include/asm/topology.h | |||
@@ -15,7 +15,7 @@ | |||
15 | #define topology_physical_package_id(cpu) (cpu_data[cpu].package) | 15 | #define topology_physical_package_id(cpu) (cpu_data[cpu].package) |
16 | #define topology_core_id(cpu) (cpu_data[cpu].core) | 16 | #define topology_core_id(cpu) (cpu_data[cpu].core) |
17 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) | 17 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) |
18 | #define topology_thread_cpumask(cpu) (&cpu_sibling_map[cpu]) | 18 | #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) |
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | #endif /* __ASM_TOPOLOGY_H */ | 21 | #endif /* __ASM_TOPOLOGY_H */ |
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index bf8b32450ef6..9722357d2854 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h | |||
@@ -103,7 +103,8 @@ extern u64 __ua_limit; | |||
103 | * @addr: User space pointer to start of block to check | 103 | * @addr: User space pointer to start of block to check |
104 | * @size: Size of block to check | 104 | * @size: Size of block to check |
105 | * | 105 | * |
106 | * Context: User context only. This function may sleep. | 106 | * Context: User context only. This function may sleep if pagefaults are |
107 | * enabled. | ||
107 | * | 108 | * |
108 | * Checks if a pointer to a block of memory in user space is valid. | 109 | * Checks if a pointer to a block of memory in user space is valid. |
109 | * | 110 | * |
@@ -138,7 +139,8 @@ extern u64 __ua_limit; | |||
138 | * @x: Value to copy to user space. | 139 | * @x: Value to copy to user space. |
139 | * @ptr: Destination address, in user space. | 140 | * @ptr: Destination address, in user space. |
140 | * | 141 | * |
141 | * Context: User context only. This function may sleep. | 142 | * Context: User context only. This function may sleep if pagefaults are |
143 | * enabled. | ||
142 | * | 144 | * |
143 | * This macro copies a single simple value from kernel space to user | 145 | * This macro copies a single simple value from kernel space to user |
144 | * space. It supports simple types like char and int, but not larger | 146 | * space. It supports simple types like char and int, but not larger |
@@ -157,7 +159,8 @@ extern u64 __ua_limit; | |||
157 | * @x: Variable to store result. | 159 | * @x: Variable to store result. |
158 | * @ptr: Source address, in user space. | 160 | * @ptr: Source address, in user space. |
159 | * | 161 | * |
160 | * Context: User context only. This function may sleep. | 162 | * Context: User context only. This function may sleep if pagefaults are |
163 | * enabled. | ||
161 | * | 164 | * |
162 | * This macro copies a single simple variable from user space to kernel | 165 | * This macro copies a single simple variable from user space to kernel |
163 | * space. It supports simple types like char and int, but not larger | 166 | * space. It supports simple types like char and int, but not larger |
@@ -177,7 +180,8 @@ extern u64 __ua_limit; | |||
177 | * @x: Value to copy to user space. | 180 | * @x: Value to copy to user space. |
178 | * @ptr: Destination address, in user space. | 181 | * @ptr: Destination address, in user space. |
179 | * | 182 | * |
180 | * Context: User context only. This function may sleep. | 183 | * Context: User context only. This function may sleep if pagefaults are |
184 | * enabled. | ||
181 | * | 185 | * |
182 | * This macro copies a single simple value from kernel space to user | 186 | * This macro copies a single simple value from kernel space to user |
183 | * space. It supports simple types like char and int, but not larger | 187 | * space. It supports simple types like char and int, but not larger |
@@ -199,7 +203,8 @@ extern u64 __ua_limit; | |||
199 | * @x: Variable to store result. | 203 | * @x: Variable to store result. |
200 | * @ptr: Source address, in user space. | 204 | * @ptr: Source address, in user space. |
201 | * | 205 | * |
202 | * Context: User context only. This function may sleep. | 206 | * Context: User context only. This function may sleep if pagefaults are |
207 | * enabled. | ||
203 | * | 208 | * |
204 | * This macro copies a single simple variable from user space to kernel | 209 | * This macro copies a single simple variable from user space to kernel |
205 | * space. It supports simple types like char and int, but not larger | 210 | * space. It supports simple types like char and int, but not larger |
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void); | |||
498 | * @x: Value to copy to user space. | 503 | * @x: Value to copy to user space. |
499 | * @ptr: Destination address, in user space. | 504 | * @ptr: Destination address, in user space. |
500 | * | 505 | * |
501 | * Context: User context only. This function may sleep. | 506 | * Context: User context only. This function may sleep if pagefaults are |
507 | * enabled. | ||
502 | * | 508 | * |
503 | * This macro copies a single simple value from kernel space to user | 509 | * This macro copies a single simple value from kernel space to user |
504 | * space. It supports simple types like char and int, but not larger | 510 | * space. It supports simple types like char and int, but not larger |
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void); | |||
517 | * @x: Variable to store result. | 523 | * @x: Variable to store result. |
518 | * @ptr: Source address, in user space. | 524 | * @ptr: Source address, in user space. |
519 | * | 525 | * |
520 | * Context: User context only. This function may sleep. | 526 | * Context: User context only. This function may sleep if pagefaults are |
527 | * enabled. | ||
521 | * | 528 | * |
522 | * This macro copies a single simple variable from user space to kernel | 529 | * This macro copies a single simple variable from user space to kernel |
523 | * space. It supports simple types like char and int, but not larger | 530 | * space. It supports simple types like char and int, but not larger |
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void); | |||
537 | * @x: Value to copy to user space. | 544 | * @x: Value to copy to user space. |
538 | * @ptr: Destination address, in user space. | 545 | * @ptr: Destination address, in user space. |
539 | * | 546 | * |
540 | * Context: User context only. This function may sleep. | 547 | * Context: User context only. This function may sleep if pagefaults are |
548 | * enabled. | ||
541 | * | 549 | * |
542 | * This macro copies a single simple value from kernel space to user | 550 | * This macro copies a single simple value from kernel space to user |
543 | * space. It supports simple types like char and int, but not larger | 551 | * space. It supports simple types like char and int, but not larger |
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void); | |||
559 | * @x: Variable to store result. | 567 | * @x: Variable to store result. |
560 | * @ptr: Source address, in user space. | 568 | * @ptr: Source address, in user space. |
561 | * | 569 | * |
562 | * Context: User context only. This function may sleep. | 570 | * Context: User context only. This function may sleep if pagefaults are |
571 | * enabled. | ||
563 | * | 572 | * |
564 | * This macro copies a single simple variable from user space to kernel | 573 | * This macro copies a single simple variable from user space to kernel |
565 | * space. It supports simple types like char and int, but not larger | 574 | * space. It supports simple types like char and int, but not larger |
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n); | |||
815 | * @from: Source address, in kernel space. | 824 | * @from: Source address, in kernel space. |
816 | * @n: Number of bytes to copy. | 825 | * @n: Number of bytes to copy. |
817 | * | 826 | * |
818 | * Context: User context only. This function may sleep. | 827 | * Context: User context only. This function may sleep if pagefaults are |
828 | * enabled. | ||
819 | * | 829 | * |
820 | * Copy data from kernel space to user space. Caller must check | 830 | * Copy data from kernel space to user space. Caller must check |
821 | * the specified block with access_ok() before calling this function. | 831 | * the specified block with access_ok() before calling this function. |
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); | |||
888 | * @from: Source address, in kernel space. | 898 | * @from: Source address, in kernel space. |
889 | * @n: Number of bytes to copy. | 899 | * @n: Number of bytes to copy. |
890 | * | 900 | * |
891 | * Context: User context only. This function may sleep. | 901 | * Context: User context only. This function may sleep if pagefaults are |
902 | * enabled. | ||
892 | * | 903 | * |
893 | * Copy data from kernel space to user space. | 904 | * Copy data from kernel space to user space. |
894 | * | 905 | * |
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); | |||
1075 | * @from: Source address, in user space. | 1086 | * @from: Source address, in user space. |
1076 | * @n: Number of bytes to copy. | 1087 | * @n: Number of bytes to copy. |
1077 | * | 1088 | * |
1078 | * Context: User context only. This function may sleep. | 1089 | * Context: User context only. This function may sleep if pagefaults are |
1090 | * enabled. | ||
1079 | * | 1091 | * |
1080 | * Copy data from user space to kernel space. Caller must check | 1092 | * Copy data from user space to kernel space. Caller must check |
1081 | * the specified block with access_ok() before calling this function. | 1093 | * the specified block with access_ok() before calling this function. |
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); | |||
1107 | * @from: Source address, in user space. | 1119 | * @from: Source address, in user space. |
1108 | * @n: Number of bytes to copy. | 1120 | * @n: Number of bytes to copy. |
1109 | * | 1121 | * |
1110 | * Context: User context only. This function may sleep. | 1122 | * Context: User context only. This function may sleep if pagefaults are |
1123 | * enabled. | ||
1111 | * | 1124 | * |
1112 | * Copy data from user space to kernel space. | 1125 | * Copy data from user space to kernel space. |
1113 | * | 1126 | * |
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len) | |||
1329 | * strlen_user: - Get the size of a string in user space. | 1342 | * strlen_user: - Get the size of a string in user space. |
1330 | * @str: The string to measure. | 1343 | * @str: The string to measure. |
1331 | * | 1344 | * |
1332 | * Context: User context only. This function may sleep. | 1345 | * Context: User context only. This function may sleep if pagefaults are |
1346 | * enabled. | ||
1333 | * | 1347 | * |
1334 | * Get the size of a NUL-terminated string in user space. | 1348 | * Get the size of a NUL-terminated string in user space. |
1335 | * | 1349 | * |
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const char __user *s, long n) | |||
1398 | * strnlen_user: - Get the size of a string in user space. | 1412 | * strnlen_user: - Get the size of a string in user space. |
1399 | * @str: The string to measure. | 1413 | * @str: The string to measure. |
1400 | * | 1414 | * |
1401 | * Context: User context only. This function may sleep. | 1415 | * Context: User context only. This function may sleep if pagefaults are |
1416 | * enabled. | ||
1402 | * | 1417 | * |
1403 | * Get the size of a NUL-terminated string in user space. | 1418 | * Get the size of a NUL-terminated string in user space. |
1404 | * | 1419 | * |
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h index 06805e09bcd3..0b85f827cd18 100644 --- a/arch/mips/kernel/signal-common.h +++ b/arch/mips/kernel/signal-common.h | |||
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, | |||
28 | extern int fpcsr_pending(unsigned int __user *fpcsr); | 28 | extern int fpcsr_pending(unsigned int __user *fpcsr); |
29 | 29 | ||
30 | /* Make sure we will not lose FPU ownership */ | 30 | /* Make sure we will not lose FPU ownership */ |
31 | #ifdef CONFIG_PREEMPT | 31 | #define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) |
32 | #define lock_fpu_owner() preempt_disable() | 32 | #define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) |
33 | #define unlock_fpu_owner() preempt_enable() | ||
34 | #else | ||
35 | #define lock_fpu_owner() pagefault_disable() | ||
36 | #define unlock_fpu_owner() pagefault_enable() | ||
37 | #endif | ||
38 | 33 | ||
39 | #endif /* __SIGNAL_COMMON_H */ | 34 | #endif /* __SIGNAL_COMMON_H */ |
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 7ff8637e530d..36c0f26fac6b 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c | |||
@@ -21,10 +21,10 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/kprobes.h> | 22 | #include <linux/kprobes.h> |
23 | #include <linux/perf_event.h> | 23 | #include <linux/perf_event.h> |
24 | #include <linux/uaccess.h> | ||
24 | 25 | ||
25 | #include <asm/branch.h> | 26 | #include <asm/branch.h> |
26 | #include <asm/mmu_context.h> | 27 | #include <asm/mmu_context.h> |
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/ptrace.h> | 28 | #include <asm/ptrace.h> |
29 | #include <asm/highmem.h> /* For VMALLOC_END */ | 29 | #include <asm/highmem.h> /* For VMALLOC_END */ |
30 | #include <linux/kdebug.h> | 30 | #include <linux/kdebug.h> |
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, | |||
94 | * If we're in an interrupt or have no user | 94 | * If we're in an interrupt or have no user |
95 | * context, we must not take the fault.. | 95 | * context, we must not take the fault.. |
96 | */ | 96 | */ |
97 | if (in_atomic() || !mm) | 97 | if (faulthandler_disabled() || !mm) |
98 | goto bad_area_nosemaphore; | 98 | goto bad_area_nosemaphore; |
99 | 99 | ||
100 | if (user_mode(regs)) | 100 | if (user_mode(regs)) |
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index da815d295239..11661cbc11a8 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c | |||
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page) | |||
47 | unsigned long vaddr; | 47 | unsigned long vaddr; |
48 | int idx, type; | 48 | int idx, type; |
49 | 49 | ||
50 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 50 | preempt_disable(); |
51 | pagefault_disable(); | 51 | pagefault_disable(); |
52 | if (!PageHighMem(page)) | 52 | if (!PageHighMem(page)) |
53 | return page_address(page); | 53 | return page_address(page); |
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr) | |||
72 | 72 | ||
73 | if (vaddr < FIXADDR_START) { // FIXME | 73 | if (vaddr < FIXADDR_START) { // FIXME |
74 | pagefault_enable(); | 74 | pagefault_enable(); |
75 | preempt_enable(); | ||
75 | return; | 76 | return; |
76 | } | 77 | } |
77 | 78 | ||
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr) | |||
92 | #endif | 93 | #endif |
93 | kmap_atomic_idx_pop(); | 94 | kmap_atomic_idx_pop(); |
94 | pagefault_enable(); | 95 | pagefault_enable(); |
96 | preempt_enable(); | ||
95 | } | 97 | } |
96 | EXPORT_SYMBOL(__kunmap_atomic); | 98 | EXPORT_SYMBOL(__kunmap_atomic); |
97 | 99 | ||
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn) | |||
104 | unsigned long vaddr; | 106 | unsigned long vaddr; |
105 | int idx, type; | 107 | int idx, type; |
106 | 108 | ||
109 | preempt_disable(); | ||
107 | pagefault_disable(); | 110 | pagefault_disable(); |
108 | 111 | ||
109 | type = kmap_atomic_idx_push(); | 112 | type = kmap_atomic_idx_push(); |
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index faa5c9822ecc..198a3147dd7d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c | |||
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) | |||
90 | 90 | ||
91 | BUG_ON(Page_dcache_dirty(page)); | 91 | BUG_ON(Page_dcache_dirty(page)); |
92 | 92 | ||
93 | preempt_disable(); | ||
93 | pagefault_disable(); | 94 | pagefault_disable(); |
94 | idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); | 95 | idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); |
95 | idx += in_interrupt() ? FIX_N_COLOURS : 0; | 96 | idx += in_interrupt() ? FIX_N_COLOURS : 0; |
@@ -152,6 +153,7 @@ void kunmap_coherent(void) | |||
152 | write_c0_entryhi(old_ctx); | 153 | write_c0_entryhi(old_ctx); |
153 | local_irq_restore(flags); | 154 | local_irq_restore(flags); |
154 | pagefault_enable(); | 155 | pagefault_enable(); |
156 | preempt_enable(); | ||
155 | } | 157 | } |
156 | 158 | ||
157 | void copy_user_highpage(struct page *to, struct page *from, | 159 | void copy_user_highpage(struct page *to, struct page *from, |
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h index 2fbbe4d920aa..1ddea5afba09 100644 --- a/arch/mn10300/include/asm/highmem.h +++ b/arch/mn10300/include/asm/highmem.h | |||
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |||
75 | unsigned long vaddr; | 75 | unsigned long vaddr; |
76 | int idx, type; | 76 | int idx, type; |
77 | 77 | ||
78 | preempt_disable(); | ||
78 | pagefault_disable(); | 79 | pagefault_disable(); |
79 | if (page < highmem_start_page) | 80 | if (page < highmem_start_page) |
80 | return page_address(page); | 81 | return page_address(page); |
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) | |||
98 | 99 | ||
99 | if (vaddr < FIXADDR_START) { /* FIXME */ | 100 | if (vaddr < FIXADDR_START) { /* FIXME */ |
100 | pagefault_enable(); | 101 | pagefault_enable(); |
102 | preempt_enable(); | ||
101 | return; | 103 | return; |
102 | } | 104 | } |
103 | 105 | ||
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) | |||
122 | 124 | ||
123 | kmap_atomic_idx_pop(); | 125 | kmap_atomic_idx_pop(); |
124 | pagefault_enable(); | 126 | pagefault_enable(); |
127 | preempt_enable(); | ||
125 | } | 128 | } |
126 | #endif /* __KERNEL__ */ | 129 | #endif /* __KERNEL__ */ |
127 | 130 | ||
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 0c2cc5d39c8e..4a1d181ed32f 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c | |||
@@ -23,8 +23,8 @@ | |||
23 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/vt_kern.h> /* For unblank_screen() */ | 25 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
26 | #include <linux/uaccess.h> | ||
26 | 27 | ||
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/pgalloc.h> | 28 | #include <asm/pgalloc.h> |
29 | #include <asm/hardirq.h> | 29 | #include <asm/hardirq.h> |
30 | #include <asm/cpu-regs.h> | 30 | #include <asm/cpu-regs.h> |
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, | |||
168 | * If we're in an interrupt or have no user | 168 | * If we're in an interrupt or have no user |
169 | * context, we must not take the fault.. | 169 | * context, we must not take the fault.. |
170 | */ | 170 | */ |
171 | if (in_atomic() || !mm) | 171 | if (faulthandler_disabled() || !mm) |
172 | goto no_context; | 172 | goto no_context; |
173 | 173 | ||
174 | if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) | 174 | if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) |
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 0c9b6afe69e9..b51878b0c6b8 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c | |||
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, | |||
77 | * If we're in an interrupt or have no user | 77 | * If we're in an interrupt or have no user |
78 | * context, we must not take the fault.. | 78 | * context, we must not take the fault.. |
79 | */ | 79 | */ |
80 | if (in_atomic() || !mm) | 80 | if (faulthandler_disabled() || !mm) |
81 | goto bad_area_nosemaphore; | 81 | goto bad_area_nosemaphore; |
82 | 82 | ||
83 | if (user_mode(regs)) | 83 | if (user_mode(regs)) |
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index de65f66ea64e..ec2df4bab302 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h | |||
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *page) | |||
142 | 142 | ||
143 | static inline void *kmap_atomic(struct page *page) | 143 | static inline void *kmap_atomic(struct page *page) |
144 | { | 144 | { |
145 | preempt_disable(); | ||
145 | pagefault_disable(); | 146 | pagefault_disable(); |
146 | return page_address(page); | 147 | return page_address(page); |
147 | } | 148 | } |
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr) | |||
150 | { | 151 | { |
151 | flush_kernel_dcache_page_addr(addr); | 152 | flush_kernel_dcache_page_addr(addr); |
152 | pagefault_enable(); | 153 | pagefault_enable(); |
154 | preempt_enable(); | ||
153 | } | 155 | } |
154 | 156 | ||
155 | #define kmap_atomic_prot(page, prot) kmap_atomic(page) | 157 | #define kmap_atomic_prot(page, prot) kmap_atomic(page) |
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 47ee620d15d2..6548fd1d2e62 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c | |||
@@ -26,9 +26,9 @@ | |||
26 | #include <linux/console.h> | 26 | #include <linux/console.h> |
27 | #include <linux/bug.h> | 27 | #include <linux/bug.h> |
28 | #include <linux/ratelimit.h> | 28 | #include <linux/ratelimit.h> |
29 | #include <linux/uaccess.h> | ||
29 | 30 | ||
30 | #include <asm/assembly.h> | 31 | #include <asm/assembly.h> |
31 | #include <asm/uaccess.h> | ||
32 | #include <asm/io.h> | 32 | #include <asm/io.h> |
33 | #include <asm/irq.h> | 33 | #include <asm/irq.h> |
34 | #include <asm/traps.h> | 34 | #include <asm/traps.h> |
@@ -800,7 +800,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) | |||
800 | * unless pagefault_disable() was called before. | 800 | * unless pagefault_disable() was called before. |
801 | */ | 801 | */ |
802 | 802 | ||
803 | if (fault_space == 0 && !in_atomic()) | 803 | if (fault_space == 0 && !faulthandler_disabled()) |
804 | { | 804 | { |
805 | pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); | 805 | pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); |
806 | parisc_terminate("Kernel Fault", regs, code, fault_address); | 806 | parisc_terminate("Kernel Fault", regs, code, fault_address); |
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e5120e653240..15503adddf4f 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c | |||
@@ -15,8 +15,8 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/uaccess.h> | ||
18 | 19 | ||
19 | #include <asm/uaccess.h> | ||
20 | #include <asm/traps.h> | 20 | #include <asm/traps.h> |
21 | 21 | ||
22 | /* Various important other fields */ | 22 | /* Various important other fields */ |
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, | |||
207 | int fault; | 207 | int fault; |
208 | unsigned int flags; | 208 | unsigned int flags; |
209 | 209 | ||
210 | if (in_atomic()) | 210 | if (pagefault_disabled()) |
211 | goto no_context; | 211 | goto no_context; |
212 | 212 | ||
213 | tsk = current; | 213 | tsk = current; |
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 5f1048eaa5b6..8b3b46b7b0f2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h | |||
@@ -87,7 +87,7 @@ static inline int prrn_is_enabled(void) | |||
87 | #include <asm/smp.h> | 87 | #include <asm/smp.h> |
88 | 88 | ||
89 | #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) | 89 | #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) |
90 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) | 90 | #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) |
91 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) | 91 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) |
92 | #define topology_core_id(cpu) (cpu_to_core_id(cpu)) | 92 | #define topology_core_id(cpu) (cpu_to_core_id(cpu)) |
93 | #endif | 93 | #endif |
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 3cf529ceec5b..ac93a3bd2730 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c | |||
@@ -27,11 +27,11 @@ int enter_vmx_usercopy(void) | |||
27 | if (in_interrupt()) | 27 | if (in_interrupt()) |
28 | return 0; | 28 | return 0; |
29 | 29 | ||
30 | /* This acts as preempt_disable() as well and will make | 30 | preempt_disable(); |
31 | * enable_kernel_altivec(). We need to disable page faults | 31 | /* |
32 | * as they can call schedule and thus make us lose the VMX | 32 | * We need to disable page faults as they can call schedule and |
33 | * context. So on page faults, we just fail which will cause | 33 | * thus make us lose the VMX context. So on page faults, we just |
34 | * a fallback to the normal non-vmx copy. | 34 | * fail which will cause a fallback to the normal non-vmx copy. |
35 | */ | 35 | */ |
36 | pagefault_disable(); | 36 | pagefault_disable(); |
37 | 37 | ||
@@ -47,6 +47,7 @@ int enter_vmx_usercopy(void) | |||
47 | int exit_vmx_usercopy(void) | 47 | int exit_vmx_usercopy(void) |
48 | { | 48 | { |
49 | pagefault_enable(); | 49 | pagefault_enable(); |
50 | preempt_enable(); | ||
50 | return 0; | 51 | return 0; |
51 | } | 52 | } |
52 | 53 | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b396868d2aa7..6d535973b200 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -33,13 +33,13 @@ | |||
33 | #include <linux/ratelimit.h> | 33 | #include <linux/ratelimit.h> |
34 | #include <linux/context_tracking.h> | 34 | #include <linux/context_tracking.h> |
35 | #include <linux/hugetlb.h> | 35 | #include <linux/hugetlb.h> |
36 | #include <linux/uaccess.h> | ||
36 | 37 | ||
37 | #include <asm/firmware.h> | 38 | #include <asm/firmware.h> |
38 | #include <asm/page.h> | 39 | #include <asm/page.h> |
39 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
40 | #include <asm/mmu.h> | 41 | #include <asm/mmu.h> |
41 | #include <asm/mmu_context.h> | 42 | #include <asm/mmu_context.h> |
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/tlbflush.h> | 43 | #include <asm/tlbflush.h> |
44 | #include <asm/siginfo.h> | 44 | #include <asm/siginfo.h> |
45 | #include <asm/debug.h> | 45 | #include <asm/debug.h> |
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
272 | if (!arch_irq_disabled_regs(regs)) | 272 | if (!arch_irq_disabled_regs(regs)) |
273 | local_irq_enable(); | 273 | local_irq_enable(); |
274 | 274 | ||
275 | if (in_atomic() || mm == NULL) { | 275 | if (faulthandler_disabled() || mm == NULL) { |
276 | if (!user_mode(regs)) { | 276 | if (!user_mode(regs)) { |
277 | rc = SIGSEGV; | 277 | rc = SIGSEGV; |
278 | goto bail; | 278 | goto bail; |
279 | } | 279 | } |
280 | /* in_atomic() in user mode is really bad, | 280 | /* faulthandler_disabled() in user mode is really bad, |
281 | as is current->mm == NULL. */ | 281 | as is current->mm == NULL. */ |
282 | printk(KERN_EMERG "Page fault in user mode with " | 282 | printk(KERN_EMERG "Page fault in user mode with " |
283 | "in_atomic() = %d mm = %p\n", in_atomic(), mm); | 283 | "faulthandler_disabled() = %d mm = %p\n", |
284 | faulthandler_disabled(), mm); | ||
284 | printk(KERN_EMERG "NIP = %lx MSR = %lx\n", | 285 | printk(KERN_EMERG "NIP = %lx MSR = %lx\n", |
285 | regs->nip, regs->msr); | 286 | regs->nip, regs->msr); |
286 | die("Weird page fault", regs, SIGSEGV); | 287 | die("Weird page fault", regs, SIGSEGV); |
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index e7450bdbe83a..e292c8a60952 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c | |||
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |||
34 | unsigned long vaddr; | 34 | unsigned long vaddr; |
35 | int idx, type; | 35 | int idx, type; |
36 | 36 | ||
37 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 37 | preempt_disable(); |
38 | pagefault_disable(); | 38 | pagefault_disable(); |
39 | if (!PageHighMem(page)) | 39 | if (!PageHighMem(page)) |
40 | return page_address(page); | 40 | return page_address(page); |
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr) | |||
59 | 59 | ||
60 | if (vaddr < __fix_to_virt(FIX_KMAP_END)) { | 60 | if (vaddr < __fix_to_virt(FIX_KMAP_END)) { |
61 | pagefault_enable(); | 61 | pagefault_enable(); |
62 | preempt_enable(); | ||
62 | return; | 63 | return; |
63 | } | 64 | } |
64 | 65 | ||
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr) | |||
82 | 83 | ||
83 | kmap_atomic_idx_pop(); | 84 | kmap_atomic_idx_pop(); |
84 | pagefault_enable(); | 85 | pagefault_enable(); |
86 | preempt_enable(); | ||
85 | } | 87 | } |
86 | EXPORT_SYMBOL(__kunmap_atomic); | 88 | EXPORT_SYMBOL(__kunmap_atomic); |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index cbd3d069897f..723a099f6be3 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock); | |||
217 | static int mm_is_core_local(struct mm_struct *mm) | 217 | static int mm_is_core_local(struct mm_struct *mm) |
218 | { | 218 | { |
219 | return cpumask_subset(mm_cpumask(mm), | 219 | return cpumask_subset(mm_cpumask(mm), |
220 | topology_thread_cpumask(smp_processor_id())); | 220 | topology_sibling_cpumask(smp_processor_id())); |
221 | } | 221 | } |
222 | 222 | ||
223 | struct tlb_flush_param { | 223 | struct tlb_flush_param { |
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index b1453a2ae1ca..4990f6c66288 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h | |||
@@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); | |||
22 | 22 | ||
23 | #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id) | 23 | #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id) |
24 | #define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id) | 24 | #define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id) |
25 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask) | 25 | #define topology_sibling_cpumask(cpu) \ |
26 | (&per_cpu(cpu_topology, cpu).thread_mask) | ||
26 | #define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id) | 27 | #define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id) |
27 | #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) | 28 | #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) |
28 | #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) | 29 | #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) |
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d64a7a62164f..9dd4cc47ddc7 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h | |||
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) | |||
98 | * @from: Source address, in user space. | 98 | * @from: Source address, in user space. |
99 | * @n: Number of bytes to copy. | 99 | * @n: Number of bytes to copy. |
100 | * | 100 | * |
101 | * Context: User context only. This function may sleep. | 101 | * Context: User context only. This function may sleep if pagefaults are |
102 | * enabled. | ||
102 | * | 103 | * |
103 | * Copy data from user space to kernel space. Caller must check | 104 | * Copy data from user space to kernel space. Caller must check |
104 | * the specified block with access_ok() before calling this function. | 105 | * the specified block with access_ok() before calling this function. |
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from, | |||
118 | * @from: Source address, in kernel space. | 119 | * @from: Source address, in kernel space. |
119 | * @n: Number of bytes to copy. | 120 | * @n: Number of bytes to copy. |
120 | * | 121 | * |
121 | * Context: User context only. This function may sleep. | 122 | * Context: User context only. This function may sleep if pagefaults are |
123 | * enabled. | ||
122 | * | 124 | * |
123 | * Copy data from kernel space to user space. Caller must check | 125 | * Copy data from kernel space to user space. Caller must check |
124 | * the specified block with access_ok() before calling this function. | 126 | * the specified block with access_ok() before calling this function. |
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn)); | |||
264 | * @from: Source address, in kernel space. | 266 | * @from: Source address, in kernel space. |
265 | * @n: Number of bytes to copy. | 267 | * @n: Number of bytes to copy. |
266 | * | 268 | * |
267 | * Context: User context only. This function may sleep. | 269 | * Context: User context only. This function may sleep if pagefaults are |
270 | * enabled. | ||
268 | * | 271 | * |
269 | * Copy data from kernel space to user space. | 272 | * Copy data from kernel space to user space. |
270 | * | 273 | * |
@@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct") | |||
290 | * @from: Source address, in user space. | 293 | * @from: Source address, in user space. |
291 | * @n: Number of bytes to copy. | 294 | * @n: Number of bytes to copy. |
292 | * | 295 | * |
293 | * Context: User context only. This function may sleep. | 296 | * Context: User context only. This function may sleep if pagefaults are |
297 | * enabled. | ||
294 | * | 298 | * |
295 | * Copy data from user space to kernel space. | 299 | * Copy data from user space to kernel space. |
296 | * | 300 | * |
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n | |||
348 | * strlen_user: - Get the size of a string in user space. | 352 | * strlen_user: - Get the size of a string in user space. |
349 | * @str: The string to measure. | 353 | * @str: The string to measure. |
350 | * | 354 | * |
351 | * Context: User context only. This function may sleep. | 355 | * Context: User context only. This function may sleep if pagefaults are |
356 | * enabled. | ||
352 | * | 357 | * |
353 | * Get the size of a NUL-terminated string in user space. | 358 | * Get the size of a NUL-terminated string in user space. |
354 | * | 359 | * |
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 76515bcea2f1..4c8f5d7f9c23 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access) | |||
399 | * user context. | 399 | * user context. |
400 | */ | 400 | */ |
401 | fault = VM_FAULT_BADCONTEXT; | 401 | fault = VM_FAULT_BADCONTEXT; |
402 | if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) | 402 | if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) |
403 | goto out; | 403 | goto out; |
404 | 404 | ||
405 | address = trans_exc_code & __FAIL_ADDR_MASK; | 405 | address = trans_exc_code & __FAIL_ADDR_MASK; |
diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index ab66ddde777b..20a3591225cc 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h | |||
@@ -36,7 +36,8 @@ | |||
36 | * @addr: User space pointer to start of block to check | 36 | * @addr: User space pointer to start of block to check |
37 | * @size: Size of block to check | 37 | * @size: Size of block to check |
38 | * | 38 | * |
39 | * Context: User context only. This function may sleep. | 39 | * Context: User context only. This function may sleep if pagefaults are |
40 | * enabled. | ||
40 | * | 41 | * |
41 | * Checks if a pointer to a block of memory in user space is valid. | 42 | * Checks if a pointer to a block of memory in user space is valid. |
42 | * | 43 | * |
@@ -61,7 +62,8 @@ | |||
61 | * @x: Value to copy to user space. | 62 | * @x: Value to copy to user space. |
62 | * @ptr: Destination address, in user space. | 63 | * @ptr: Destination address, in user space. |
63 | * | 64 | * |
64 | * Context: User context only. This function may sleep. | 65 | * Context: User context only. This function may sleep if pagefaults are |
66 | * enabled. | ||
65 | * | 67 | * |
66 | * This macro copies a single simple value from kernel space to user | 68 | * This macro copies a single simple value from kernel space to user |
67 | * space. It supports simple types like char and int, but not larger | 69 | * space. It supports simple types like char and int, but not larger |
@@ -79,7 +81,8 @@ | |||
79 | * @x: Variable to store result. | 81 | * @x: Variable to store result. |
80 | * @ptr: Source address, in user space. | 82 | * @ptr: Source address, in user space. |
81 | * | 83 | * |
82 | * Context: User context only. This function may sleep. | 84 | * Context: User context only. This function may sleep if pagefaults are |
85 | * enabled. | ||
83 | * | 86 | * |
84 | * This macro copies a single simple variable from user space to kernel | 87 | * This macro copies a single simple variable from user space to kernel |
85 | * space. It supports simple types like char and int, but not larger | 88 | * space. It supports simple types like char and int, but not larger |
@@ -98,7 +101,8 @@ | |||
98 | * @x: Value to copy to user space. | 101 | * @x: Value to copy to user space. |
99 | * @ptr: Destination address, in user space. | 102 | * @ptr: Destination address, in user space. |
100 | * | 103 | * |
101 | * Context: User context only. This function may sleep. | 104 | * Context: User context only. This function may sleep if pagefaults are |
105 | * enabled. | ||
102 | * | 106 | * |
103 | * This macro copies a single simple value from kernel space to user | 107 | * This macro copies a single simple value from kernel space to user |
104 | * space. It supports simple types like char and int, but not larger | 108 | * space. It supports simple types like char and int, but not larger |
@@ -119,7 +123,8 @@ | |||
119 | * @x: Variable to store result. | 123 | * @x: Variable to store result. |
120 | * @ptr: Source address, in user space. | 124 | * @ptr: Source address, in user space. |
121 | * | 125 | * |
122 | * Context: User context only. This function may sleep. | 126 | * Context: User context only. This function may sleep if pagefaults are |
127 | * enabled. | ||
123 | * | 128 | * |
124 | * This macro copies a single simple variable from user space to kernel | 129 | * This macro copies a single simple variable from user space to kernel |
125 | * space. It supports simple types like char and int, but not larger | 130 | * space. It supports simple types like char and int, but not larger |
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 6860beb2a280..37a6c2e0e969 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/string.h> | 34 | #include <linux/string.h> |
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <linux/ptrace.h> | 36 | #include <linux/ptrace.h> |
37 | #include <linux/uaccess.h> | ||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * This routine handles page faults. It determines the address, | 40 | * This routine handles page faults. It determines the address, |
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, | |||
73 | * If we're in an interrupt or have no user | 74 | * If we're in an interrupt or have no user |
74 | * context, we must not take the fault.. | 75 | * context, we must not take the fault.. |
75 | */ | 76 | */ |
76 | if (in_atomic() || !mm) | 77 | if (pagefault_disabled() || !mm) |
77 | goto bad_area_nosemaphore; | 78 | goto bad_area_nosemaphore; |
78 | 79 | ||
79 | if (user_mode(regs)) | 80 | if (user_mode(regs)) |
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index a58fec9b55e0..79d8276377d1 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kprobes.h> | 17 | #include <linux/kprobes.h> |
18 | #include <linux/perf_event.h> | 18 | #include <linux/perf_event.h> |
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/uaccess.h> | ||
20 | #include <asm/io_trapped.h> | 21 | #include <asm/io_trapped.h> |
21 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
22 | #include <asm/tlbflush.h> | 23 | #include <asm/tlbflush.h> |
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
438 | 439 | ||
439 | /* | 440 | /* |
440 | * If we're in an interrupt, have no user context or are running | 441 | * If we're in an interrupt, have no user context or are running |
441 | * in an atomic region then we must not take the fault: | 442 | * with pagefaults disabled then we must not take the fault: |
442 | */ | 443 | */ |
443 | if (unlikely(in_atomic() || !mm)) { | 444 | if (unlikely(faulthandler_disabled() || !mm)) { |
444 | bad_area_nosemaphore(regs, error_code, address); | 445 | bad_area_nosemaphore(regs, error_code, address); |
445 | return; | 446 | return; |
446 | } | 447 | } |
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index d1761df5cca6..01d17046225a 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h | |||
@@ -41,7 +41,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus) | |||
41 | #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) | 41 | #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) |
42 | #define topology_core_id(cpu) (cpu_data(cpu).core_id) | 42 | #define topology_core_id(cpu) (cpu_data(cpu).core_id) |
43 | #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) | 43 | #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) |
44 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) | 44 | #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) |
45 | #endif /* CONFIG_SMP */ | 45 | #endif /* CONFIG_SMP */ |
46 | 46 | ||
47 | extern cpumask_t cpu_core_map[NR_CPUS]; | 47 | extern cpumask_t cpu_core_map[NR_CPUS]; |
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 70d817154fe8..c399e7b3b035 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/perf_event.h> | 21 | #include <linux/perf_event.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/kdebug.h> | 23 | #include <linux/kdebug.h> |
24 | #include <linux/uaccess.h> | ||
24 | 25 | ||
25 | #include <asm/page.h> | 26 | #include <asm/page.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -29,7 +30,6 @@ | |||
29 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
30 | #include <asm/smp.h> | 31 | #include <asm/smp.h> |
31 | #include <asm/traps.h> | 32 | #include <asm/traps.h> |
32 | #include <asm/uaccess.h> | ||
33 | 33 | ||
34 | #include "mm_32.h" | 34 | #include "mm_32.h" |
35 | 35 | ||
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, | |||
196 | * If we're in an interrupt or have no user | 196 | * If we're in an interrupt or have no user |
197 | * context, we must not take the fault.. | 197 | * context, we must not take the fault.. |
198 | */ | 198 | */ |
199 | if (in_atomic() || !mm) | 199 | if (pagefault_disabled() || !mm) |
200 | goto no_context; | 200 | goto no_context; |
201 | 201 | ||
202 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | 202 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 479823249429..e9268ea1a68d 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c | |||
@@ -22,12 +22,12 @@ | |||
22 | #include <linux/kdebug.h> | 22 | #include <linux/kdebug.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <linux/context_tracking.h> | 24 | #include <linux/context_tracking.h> |
25 | #include <linux/uaccess.h> | ||
25 | 26 | ||
26 | #include <asm/page.h> | 27 | #include <asm/page.h> |
27 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable.h> |
28 | #include <asm/openprom.h> | 29 | #include <asm/openprom.h> |
29 | #include <asm/oplib.h> | 30 | #include <asm/oplib.h> |
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/asi.h> | 31 | #include <asm/asi.h> |
32 | #include <asm/lsu.h> | 32 | #include <asm/lsu.h> |
33 | #include <asm/sections.h> | 33 | #include <asm/sections.h> |
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) | |||
330 | * If we're in an interrupt or have no user | 330 | * If we're in an interrupt or have no user |
331 | * context, we must not take the fault.. | 331 | * context, we must not take the fault.. |
332 | */ | 332 | */ |
333 | if (in_atomic() || !mm) | 333 | if (faulthandler_disabled() || !mm) |
334 | goto intr_or_no_mm; | 334 | goto intr_or_no_mm; |
335 | 335 | ||
336 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | 336 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 449f864f0cef..a454ec5ff07a 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c | |||
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page) | |||
53 | unsigned long vaddr; | 53 | unsigned long vaddr; |
54 | long idx, type; | 54 | long idx, type; |
55 | 55 | ||
56 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 56 | preempt_disable(); |
57 | pagefault_disable(); | 57 | pagefault_disable(); |
58 | if (!PageHighMem(page)) | 58 | if (!PageHighMem(page)) |
59 | return page_address(page); | 59 | return page_address(page); |
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr) | |||
91 | 91 | ||
92 | if (vaddr < FIXADDR_START) { // FIXME | 92 | if (vaddr < FIXADDR_START) { // FIXME |
93 | pagefault_enable(); | 93 | pagefault_enable(); |
94 | preempt_enable(); | ||
94 | return; | 95 | return; |
95 | } | 96 | } |
96 | 97 | ||
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr) | |||
126 | 127 | ||
127 | kmap_atomic_idx_pop(); | 128 | kmap_atomic_idx_pop(); |
128 | pagefault_enable(); | 129 | pagefault_enable(); |
130 | preempt_enable(); | ||
129 | } | 131 | } |
130 | EXPORT_SYMBOL(__kunmap_atomic); | 132 | EXPORT_SYMBOL(__kunmap_atomic); |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 559cb744112c..c5d08b89a96c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2738,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs) | |||
2738 | struct mm_struct *mm = current->mm; | 2738 | struct mm_struct *mm = current->mm; |
2739 | struct tsb_config *tp; | 2739 | struct tsb_config *tp; |
2740 | 2740 | ||
2741 | if (in_atomic() || !mm) { | 2741 | if (faulthandler_disabled() || !mm) { |
2742 | const struct exception_table_entry *entry; | 2742 | const struct exception_table_entry *entry; |
2743 | 2743 | ||
2744 | entry = search_exception_tables(regs->tpc); | 2744 | entry = search_exception_tables(regs->tpc); |
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 938311844233..76b0d0ebb244 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h | |||
@@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node) | |||
55 | #define topology_physical_package_id(cpu) ((void)(cpu), 0) | 55 | #define topology_physical_package_id(cpu) ((void)(cpu), 0) |
56 | #define topology_core_id(cpu) (cpu) | 56 | #define topology_core_id(cpu) (cpu) |
57 | #define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) | 57 | #define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) |
58 | #define topology_thread_cpumask(cpu) cpumask_of(cpu) | 58 | #define topology_sibling_cpumask(cpu) cpumask_of(cpu) |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | #endif /* _ASM_TILE_TOPOLOGY_H */ | 61 | #endif /* _ASM_TILE_TOPOLOGY_H */ |
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index f41cb53cf645..a33276bf5ca1 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h | |||
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size); | |||
78 | * @addr: User space pointer to start of block to check | 78 | * @addr: User space pointer to start of block to check |
79 | * @size: Size of block to check | 79 | * @size: Size of block to check |
80 | * | 80 | * |
81 | * Context: User context only. This function may sleep. | 81 | * Context: User context only. This function may sleep if pagefaults are |
82 | * enabled. | ||
82 | * | 83 | * |
83 | * Checks if a pointer to a block of memory in user space is valid. | 84 | * Checks if a pointer to a block of memory in user space is valid. |
84 | * | 85 | * |
@@ -192,7 +193,8 @@ extern int __get_user_bad(void) | |||
192 | * @x: Variable to store result. | 193 | * @x: Variable to store result. |
193 | * @ptr: Source address, in user space. | 194 | * @ptr: Source address, in user space. |
194 | * | 195 | * |
195 | * Context: User context only. This function may sleep. | 196 | * Context: User context only. This function may sleep if pagefaults are |
197 | * enabled. | ||
196 | * | 198 | * |
197 | * This macro copies a single simple variable from user space to kernel | 199 | * This macro copies a single simple variable from user space to kernel |
198 | * space. It supports simple types like char and int, but not larger | 200 | * space. It supports simple types like char and int, but not larger |
@@ -274,7 +276,8 @@ extern int __put_user_bad(void) | |||
274 | * @x: Value to copy to user space. | 276 | * @x: Value to copy to user space. |
275 | * @ptr: Destination address, in user space. | 277 | * @ptr: Destination address, in user space. |
276 | * | 278 | * |
277 | * Context: User context only. This function may sleep. | 279 | * Context: User context only. This function may sleep if pagefaults are |
280 | * enabled. | ||
278 | * | 281 | * |
279 | * This macro copies a single simple value from kernel space to user | 282 | * This macro copies a single simple value from kernel space to user |
280 | * space. It supports simple types like char and int, but not larger | 283 | * space. It supports simple types like char and int, but not larger |
@@ -330,7 +333,8 @@ extern int __put_user_bad(void) | |||
330 | * @from: Source address, in kernel space. | 333 | * @from: Source address, in kernel space. |
331 | * @n: Number of bytes to copy. | 334 | * @n: Number of bytes to copy. |
332 | * | 335 | * |
333 | * Context: User context only. This function may sleep. | 336 | * Context: User context only. This function may sleep if pagefaults are |
337 | * enabled. | ||
334 | * | 338 | * |
335 | * Copy data from kernel space to user space. Caller must check | 339 | * Copy data from kernel space to user space. Caller must check |
336 | * the specified block with access_ok() before calling this function. | 340 | * the specified block with access_ok() before calling this function. |
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n) | |||
366 | * @from: Source address, in user space. | 370 | * @from: Source address, in user space. |
367 | * @n: Number of bytes to copy. | 371 | * @n: Number of bytes to copy. |
368 | * | 372 | * |
369 | * Context: User context only. This function may sleep. | 373 | * Context: User context only. This function may sleep if pagefaults are |
374 | * enabled. | ||
370 | * | 375 | * |
371 | * Copy data from user space to kernel space. Caller must check | 376 | * Copy data from user space to kernel space. Caller must check |
372 | * the specified block with access_ok() before calling this function. | 377 | * the specified block with access_ok() before calling this function. |
@@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to, | |||
437 | * @from: Source address, in user space. | 442 | * @from: Source address, in user space. |
438 | * @n: Number of bytes to copy. | 443 | * @n: Number of bytes to copy. |
439 | * | 444 | * |
440 | * Context: User context only. This function may sleep. | 445 | * Context: User context only. This function may sleep if pagefaults are |
446 | * enabled. | ||
441 | * | 447 | * |
442 | * Copy data from user space to user space. Caller must check | 448 | * Copy data from user space to user space. Caller must check |
443 | * the specified blocks with access_ok() before calling this function. | 449 | * the specified blocks with access_ok() before calling this function. |
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index e83cc999da02..3f4f58d34a92 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs, | |||
354 | 354 | ||
355 | /* | 355 | /* |
356 | * If we're in an interrupt, have no user context or are running in an | 356 | * If we're in an interrupt, have no user context or are running in an |
357 | * atomic region then we must not take the fault. | 357 | * region with pagefaults disabled then we must not take the fault. |
358 | */ | 358 | */ |
359 | if (in_atomic() || !mm) { | 359 | if (pagefault_disabled() || !mm) { |
360 | vma = NULL; /* happy compiler */ | 360 | vma = NULL; /* happy compiler */ |
361 | goto bad_area_nosemaphore; | 361 | goto bad_area_nosemaphore; |
362 | } | 362 | } |
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 6aa2f2625447..fcd545014e79 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c | |||
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |||
201 | int idx, type; | 201 | int idx, type; |
202 | pte_t *pte; | 202 | pte_t *pte; |
203 | 203 | ||
204 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 204 | preempt_disable(); |
205 | pagefault_disable(); | 205 | pagefault_disable(); |
206 | 206 | ||
207 | /* Avoid icache flushes by disallowing atomic executable mappings. */ | 207 | /* Avoid icache flushes by disallowing atomic executable mappings. */ |
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr) | |||
259 | } | 259 | } |
260 | 260 | ||
261 | pagefault_enable(); | 261 | pagefault_enable(); |
262 | preempt_enable(); | ||
262 | } | 263 | } |
263 | EXPORT_SYMBOL(__kunmap_atomic); | 264 | EXPORT_SYMBOL(__kunmap_atomic); |
264 | 265 | ||
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 8e4daf44e980..47ff9b7f3e5d 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/hardirq.h> | 8 | #include <linux/hardirq.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/uaccess.h> | ||
10 | #include <asm/current.h> | 11 | #include <asm/current.h> |
11 | #include <asm/pgtable.h> | 12 | #include <asm/pgtable.h> |
12 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
@@ -35,10 +36,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, | |||
35 | *code_out = SEGV_MAPERR; | 36 | *code_out = SEGV_MAPERR; |
36 | 37 | ||
37 | /* | 38 | /* |
38 | * If the fault was during atomic operation, don't take the fault, just | 39 | * If the fault was with pagefaults disabled, don't take the fault, just |
39 | * fail. | 40 | * fail. |
40 | */ | 41 | */ |
41 | if (in_atomic()) | 42 | if (faulthandler_disabled()) |
42 | goto out_nosemaphore; | 43 | goto out_nosemaphore; |
43 | 44 | ||
44 | if (is_user) | 45 | if (is_user) |
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 0dc922dba915..afccef5529cc 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c | |||
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |||
218 | * If we're in an interrupt or have no user | 218 | * If we're in an interrupt or have no user |
219 | * context, we must not take the fault.. | 219 | * context, we must not take the fault.. |
220 | */ | 220 | */ |
221 | if (in_atomic() || !mm) | 221 | if (faulthandler_disabled() || !mm) |
222 | goto no_context; | 222 | goto no_context; |
223 | 223 | ||
224 | if (user_mode(regs)) | 224 | if (user_mode(regs)) |
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 8f3271842533..dca71714f860 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h | |||
@@ -99,11 +99,9 @@ static __always_inline bool should_resched(void) | |||
99 | extern asmlinkage void ___preempt_schedule(void); | 99 | extern asmlinkage void ___preempt_schedule(void); |
100 | # define __preempt_schedule() asm ("call ___preempt_schedule") | 100 | # define __preempt_schedule() asm ("call ___preempt_schedule") |
101 | extern asmlinkage void preempt_schedule(void); | 101 | extern asmlinkage void preempt_schedule(void); |
102 | # ifdef CONFIG_CONTEXT_TRACKING | 102 | extern asmlinkage void ___preempt_schedule_notrace(void); |
103 | extern asmlinkage void ___preempt_schedule_context(void); | 103 | # define __preempt_schedule_notrace() asm ("call ___preempt_schedule_notrace") |
104 | # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") | 104 | extern asmlinkage void preempt_schedule_notrace(void); |
105 | extern asmlinkage void preempt_schedule_context(void); | ||
106 | # endif | ||
107 | #endif | 105 | #endif |
108 | 106 | ||
109 | #endif /* __ASM_PREEMPT_H */ | 107 | #endif /* __ASM_PREEMPT_H */ |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 17a8dced12da..222a6a3ca2b5 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -37,16 +37,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); | |||
37 | DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); | 37 | DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); |
38 | DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); | 38 | DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); |
39 | 39 | ||
40 | static inline struct cpumask *cpu_sibling_mask(int cpu) | ||
41 | { | ||
42 | return per_cpu(cpu_sibling_map, cpu); | ||
43 | } | ||
44 | |||
45 | static inline struct cpumask *cpu_core_mask(int cpu) | ||
46 | { | ||
47 | return per_cpu(cpu_core_map, cpu); | ||
48 | } | ||
49 | |||
50 | static inline struct cpumask *cpu_llc_shared_mask(int cpu) | 40 | static inline struct cpumask *cpu_llc_shared_mask(int cpu) |
51 | { | 41 | { |
52 | return per_cpu(cpu_llc_shared_map, cpu); | 42 | return per_cpu(cpu_llc_shared_map, cpu); |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0e8f04f2c26f..5a77593fdace 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -124,7 +124,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); | |||
124 | 124 | ||
125 | #ifdef ENABLE_TOPO_DEFINES | 125 | #ifdef ENABLE_TOPO_DEFINES |
126 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) | 126 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) |
127 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) | 127 | #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) |
128 | #endif | 128 | #endif |
129 | 129 | ||
130 | static inline void arch_fix_phys_package_id(int num, u32 slot) | 130 | static inline void arch_fix_phys_package_id(int num, u32 slot) |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index ace9dec050b1..a8df874f3e88 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un | |||
74 | * @addr: User space pointer to start of block to check | 74 | * @addr: User space pointer to start of block to check |
75 | * @size: Size of block to check | 75 | * @size: Size of block to check |
76 | * | 76 | * |
77 | * Context: User context only. This function may sleep. | 77 | * Context: User context only. This function may sleep if pagefaults are |
78 | * enabled. | ||
78 | * | 79 | * |
79 | * Checks if a pointer to a block of memory in user space is valid. | 80 | * Checks if a pointer to a block of memory in user space is valid. |
80 | * | 81 | * |
@@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) | |||
145 | * @x: Variable to store result. | 146 | * @x: Variable to store result. |
146 | * @ptr: Source address, in user space. | 147 | * @ptr: Source address, in user space. |
147 | * | 148 | * |
148 | * Context: User context only. This function may sleep. | 149 | * Context: User context only. This function may sleep if pagefaults are |
150 | * enabled. | ||
149 | * | 151 | * |
150 | * This macro copies a single simple variable from user space to kernel | 152 | * This macro copies a single simple variable from user space to kernel |
151 | * space. It supports simple types like char and int, but not larger | 153 | * space. It supports simple types like char and int, but not larger |
@@ -240,7 +242,8 @@ extern void __put_user_8(void); | |||
240 | * @x: Value to copy to user space. | 242 | * @x: Value to copy to user space. |
241 | * @ptr: Destination address, in user space. | 243 | * @ptr: Destination address, in user space. |
242 | * | 244 | * |
243 | * Context: User context only. This function may sleep. | 245 | * Context: User context only. This function may sleep if pagefaults are |
246 | * enabled. | ||
244 | * | 247 | * |
245 | * This macro copies a single simple value from kernel space to user | 248 | * This macro copies a single simple value from kernel space to user |
246 | * space. It supports simple types like char and int, but not larger | 249 | * space. It supports simple types like char and int, but not larger |
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long buf[100]; }; | |||
455 | * @x: Variable to store result. | 458 | * @x: Variable to store result. |
456 | * @ptr: Source address, in user space. | 459 | * @ptr: Source address, in user space. |
457 | * | 460 | * |
458 | * Context: User context only. This function may sleep. | 461 | * Context: User context only. This function may sleep if pagefaults are |
462 | * enabled. | ||
459 | * | 463 | * |
460 | * This macro copies a single simple variable from user space to kernel | 464 | * This macro copies a single simple variable from user space to kernel |
461 | * space. It supports simple types like char and int, but not larger | 465 | * space. It supports simple types like char and int, but not larger |
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long buf[100]; }; | |||
479 | * @x: Value to copy to user space. | 483 | * @x: Value to copy to user space. |
480 | * @ptr: Destination address, in user space. | 484 | * @ptr: Destination address, in user space. |
481 | * | 485 | * |
482 | * Context: User context only. This function may sleep. | 486 | * Context: User context only. This function may sleep if pagefaults are |
487 | * enabled. | ||
483 | * | 488 | * |
484 | * This macro copies a single simple value from kernel space to user | 489 | * This macro copies a single simple value from kernel space to user |
485 | * space. It supports simple types like char and int, but not larger | 490 | * space. It supports simple types like char and int, but not larger |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..7c8ad3451988 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) | |||
70 | * @from: Source address, in kernel space. | 70 | * @from: Source address, in kernel space. |
71 | * @n: Number of bytes to copy. | 71 | * @n: Number of bytes to copy. |
72 | * | 72 | * |
73 | * Context: User context only. This function may sleep. | 73 | * Context: User context only. This function may sleep if pagefaults are |
74 | * enabled. | ||
74 | * | 75 | * |
75 | * Copy data from kernel space to user space. Caller must check | 76 | * Copy data from kernel space to user space. Caller must check |
76 | * the specified block with access_ok() before calling this function. | 77 | * the specified block with access_ok() before calling this function. |
@@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) | |||
117 | * @from: Source address, in user space. | 118 | * @from: Source address, in user space. |
118 | * @n: Number of bytes to copy. | 119 | * @n: Number of bytes to copy. |
119 | * | 120 | * |
120 | * Context: User context only. This function may sleep. | 121 | * Context: User context only. This function may sleep if pagefaults are |
122 | * enabled. | ||
121 | * | 123 | * |
122 | * Copy data from user space to kernel space. Caller must check | 124 | * Copy data from user space to kernel space. Caller must check |
123 | * the specified block with access_ok() before calling this function. | 125 | * the specified block with access_ok() before calling this function. |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 19980d9a6cc9..b9826a981fb2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -2576,7 +2576,7 @@ static void intel_pmu_cpu_starting(int cpu) | |||
2576 | if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { | 2576 | if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { |
2577 | void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; | 2577 | void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; |
2578 | 2578 | ||
2579 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 2579 | for_each_cpu(i, topology_sibling_cpumask(cpu)) { |
2580 | struct intel_shared_regs *pc; | 2580 | struct intel_shared_regs *pc; |
2581 | 2581 | ||
2582 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 2582 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
@@ -2594,7 +2594,7 @@ static void intel_pmu_cpu_starting(int cpu) | |||
2594 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; | 2594 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
2595 | 2595 | ||
2596 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { | 2596 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
2597 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 2597 | for_each_cpu(i, topology_sibling_cpumask(cpu)) { |
2598 | struct intel_excl_cntrs *c; | 2598 | struct intel_excl_cntrs *c; |
2599 | 2599 | ||
2600 | c = per_cpu(cpu_hw_events, i).excl_cntrs; | 2600 | c = per_cpu(cpu_hw_events, i).excl_cntrs; |
@@ -3362,7 +3362,7 @@ static __init int fixup_ht_bug(void) | |||
3362 | if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) | 3362 | if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) |
3363 | return 0; | 3363 | return 0; |
3364 | 3364 | ||
3365 | w = cpumask_weight(topology_thread_cpumask(cpu)); | 3365 | w = cpumask_weight(topology_sibling_cpumask(cpu)); |
3366 | if (w > 1) { | 3366 | if (w > 1) { |
3367 | pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); | 3367 | pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); |
3368 | return 0; | 3368 | return 0; |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e7d8c7608471..18ca99f2798b 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -12,7 +12,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | |||
12 | { | 12 | { |
13 | #ifdef CONFIG_SMP | 13 | #ifdef CONFIG_SMP |
14 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | 14 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); |
15 | seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu))); | 15 | seq_printf(m, "siblings\t: %d\n", |
16 | cpumask_weight(topology_core_cpumask(cpu))); | ||
16 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | 17 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); |
17 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | 18 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
18 | seq_printf(m, "apicid\t\t: %d\n", c->apicid); | 19 | seq_printf(m, "apicid\t\t: %d\n", c->apicid); |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 05fd74f537d6..64341aa485ae 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -40,7 +40,5 @@ EXPORT_SYMBOL(empty_zero_page); | |||
40 | 40 | ||
41 | #ifdef CONFIG_PREEMPT | 41 | #ifdef CONFIG_PREEMPT |
42 | EXPORT_SYMBOL(___preempt_schedule); | 42 | EXPORT_SYMBOL(___preempt_schedule); |
43 | #ifdef CONFIG_CONTEXT_TRACKING | 43 | EXPORT_SYMBOL(___preempt_schedule_notrace); |
44 | EXPORT_SYMBOL(___preempt_schedule_context); | ||
45 | #endif | ||
46 | #endif | 44 | #endif |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc0..c648139d68d7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -445,11 +445,10 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) | |||
445 | } | 445 | } |
446 | 446 | ||
447 | /* | 447 | /* |
448 | * MONITOR/MWAIT with no hints, used for default default C1 state. | 448 | * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT |
449 | * This invokes MWAIT with interrutps enabled and no flags, | 449 | * with interrupts enabled and no flags, which is backwards compatible with the |
450 | * which is backwards compatible with the original MWAIT implementation. | 450 | * original MWAIT implementation. |
451 | */ | 451 | */ |
452 | |||
453 | static void mwait_idle(void) | 452 | static void mwait_idle(void) |
454 | { | 453 | { |
455 | if (!current_set_polling_and_test()) { | 454 | if (!current_set_polling_and_test()) { |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547eac8cd..0e8209619455 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -314,10 +314,10 @@ topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) | |||
314 | cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); | 314 | cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); |
315 | } | 315 | } |
316 | 316 | ||
317 | #define link_mask(_m, c1, c2) \ | 317 | #define link_mask(mfunc, c1, c2) \ |
318 | do { \ | 318 | do { \ |
319 | cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \ | 319 | cpumask_set_cpu((c1), mfunc(c2)); \ |
320 | cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \ | 320 | cpumask_set_cpu((c2), mfunc(c1)); \ |
321 | } while (0) | 321 | } while (0) |
322 | 322 | ||
323 | static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | 323 | static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) |
@@ -398,9 +398,9 @@ void set_cpu_sibling_map(int cpu) | |||
398 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); | 398 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); |
399 | 399 | ||
400 | if (!has_mp) { | 400 | if (!has_mp) { |
401 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 401 | cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); |
402 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); | 402 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); |
403 | cpumask_set_cpu(cpu, cpu_core_mask(cpu)); | 403 | cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); |
404 | c->booted_cores = 1; | 404 | c->booted_cores = 1; |
405 | return; | 405 | return; |
406 | } | 406 | } |
@@ -409,32 +409,34 @@ void set_cpu_sibling_map(int cpu) | |||
409 | o = &cpu_data(i); | 409 | o = &cpu_data(i); |
410 | 410 | ||
411 | if ((i == cpu) || (has_smt && match_smt(c, o))) | 411 | if ((i == cpu) || (has_smt && match_smt(c, o))) |
412 | link_mask(sibling, cpu, i); | 412 | link_mask(topology_sibling_cpumask, cpu, i); |
413 | 413 | ||
414 | if ((i == cpu) || (has_mp && match_llc(c, o))) | 414 | if ((i == cpu) || (has_mp && match_llc(c, o))) |
415 | link_mask(llc_shared, cpu, i); | 415 | link_mask(cpu_llc_shared_mask, cpu, i); |
416 | 416 | ||
417 | } | 417 | } |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * This needs a separate iteration over the cpus because we rely on all | 420 | * This needs a separate iteration over the cpus because we rely on all |
421 | * cpu_sibling_mask links to be set-up. | 421 | * topology_sibling_cpumask links to be set-up. |
422 | */ | 422 | */ |
423 | for_each_cpu(i, cpu_sibling_setup_mask) { | 423 | for_each_cpu(i, cpu_sibling_setup_mask) { |
424 | o = &cpu_data(i); | 424 | o = &cpu_data(i); |
425 | 425 | ||
426 | if ((i == cpu) || (has_mp && match_die(c, o))) { | 426 | if ((i == cpu) || (has_mp && match_die(c, o))) { |
427 | link_mask(core, cpu, i); | 427 | link_mask(topology_core_cpumask, cpu, i); |
428 | 428 | ||
429 | /* | 429 | /* |
430 | * Does this new cpu bringup a new core? | 430 | * Does this new cpu bringup a new core? |
431 | */ | 431 | */ |
432 | if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) { | 432 | if (cpumask_weight( |
433 | topology_sibling_cpumask(cpu)) == 1) { | ||
433 | /* | 434 | /* |
434 | * for each core in package, increment | 435 | * for each core in package, increment |
435 | * the booted_cores for this new cpu | 436 | * the booted_cores for this new cpu |
436 | */ | 437 | */ |
437 | if (cpumask_first(cpu_sibling_mask(i)) == i) | 438 | if (cpumask_first( |
439 | topology_sibling_cpumask(i)) == i) | ||
438 | c->booted_cores++; | 440 | c->booted_cores++; |
439 | /* | 441 | /* |
440 | * increment the core count for all | 442 | * increment the core count for all |
@@ -1009,8 +1011,8 @@ static __init void disable_smp(void) | |||
1009 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 1011 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
1010 | else | 1012 | else |
1011 | physid_set_mask_of_physid(0, &phys_cpu_present_map); | 1013 | physid_set_mask_of_physid(0, &phys_cpu_present_map); |
1012 | cpumask_set_cpu(0, cpu_sibling_mask(0)); | 1014 | cpumask_set_cpu(0, topology_sibling_cpumask(0)); |
1013 | cpumask_set_cpu(0, cpu_core_mask(0)); | 1015 | cpumask_set_cpu(0, topology_core_cpumask(0)); |
1014 | } | 1016 | } |
1015 | 1017 | ||
1016 | enum { | 1018 | enum { |
@@ -1293,22 +1295,22 @@ static void remove_siblinginfo(int cpu) | |||
1293 | int sibling; | 1295 | int sibling; |
1294 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1296 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1295 | 1297 | ||
1296 | for_each_cpu(sibling, cpu_core_mask(cpu)) { | 1298 | for_each_cpu(sibling, topology_core_cpumask(cpu)) { |
1297 | cpumask_clear_cpu(cpu, cpu_core_mask(sibling)); | 1299 | cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); |
1298 | /*/ | 1300 | /*/ |
1299 | * last thread sibling in this cpu core going down | 1301 | * last thread sibling in this cpu core going down |
1300 | */ | 1302 | */ |
1301 | if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) | 1303 | if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1) |
1302 | cpu_data(sibling).booted_cores--; | 1304 | cpu_data(sibling).booted_cores--; |
1303 | } | 1305 | } |
1304 | 1306 | ||
1305 | for_each_cpu(sibling, cpu_sibling_mask(cpu)) | 1307 | for_each_cpu(sibling, topology_sibling_cpumask(cpu)) |
1306 | cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); | 1308 | cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); |
1307 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) | 1309 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) |
1308 | cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); | 1310 | cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); |
1309 | cpumask_clear(cpu_llc_shared_mask(cpu)); | 1311 | cpumask_clear(cpu_llc_shared_mask(cpu)); |
1310 | cpumask_clear(cpu_sibling_mask(cpu)); | 1312 | cpumask_clear(topology_sibling_cpumask(cpu)); |
1311 | cpumask_clear(cpu_core_mask(cpu)); | 1313 | cpumask_clear(topology_core_cpumask(cpu)); |
1312 | c->phys_proc_id = 0; | 1314 | c->phys_proc_id = 0; |
1313 | c->cpu_core_id = 0; | 1315 | c->cpu_core_id = 0; |
1314 | cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); | 1316 | cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 26488487bc61..dd8d0791dfb5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -113,7 +113,7 @@ static void check_tsc_warp(unsigned int timeout) | |||
113 | */ | 113 | */ |
114 | static inline unsigned int loop_timeout(int cpu) | 114 | static inline unsigned int loop_timeout(int cpu) |
115 | { | 115 | { |
116 | return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20; | 116 | return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20; |
117 | } | 117 | } |
118 | 118 | ||
119 | /* | 119 | /* |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 37d8fa4438f0..a0695be19864 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -75,7 +75,5 @@ EXPORT_SYMBOL(native_load_gs_index); | |||
75 | 75 | ||
76 | #ifdef CONFIG_PREEMPT | 76 | #ifdef CONFIG_PREEMPT |
77 | EXPORT_SYMBOL(___preempt_schedule); | 77 | EXPORT_SYMBOL(___preempt_schedule); |
78 | #ifdef CONFIG_CONTEXT_TRACKING | 78 | EXPORT_SYMBOL(___preempt_schedule_notrace); |
79 | EXPORT_SYMBOL(___preempt_schedule_context); | ||
80 | #endif | ||
81 | #endif | 79 | #endif |
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 5eb715087b80..e407941d0488 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S | |||
@@ -38,8 +38,6 @@ | |||
38 | 38 | ||
39 | #ifdef CONFIG_PREEMPT | 39 | #ifdef CONFIG_PREEMPT |
40 | THUNK ___preempt_schedule, preempt_schedule | 40 | THUNK ___preempt_schedule, preempt_schedule |
41 | #ifdef CONFIG_CONTEXT_TRACKING | 41 | THUNK ___preempt_schedule_notrace, preempt_schedule_notrace |
42 | THUNK ___preempt_schedule_context, preempt_schedule_context | ||
43 | #endif | ||
44 | #endif | 42 | #endif |
45 | 43 | ||
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index f89ba4e93025..2198902329b5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -49,9 +49,7 @@ | |||
49 | 49 | ||
50 | #ifdef CONFIG_PREEMPT | 50 | #ifdef CONFIG_PREEMPT |
51 | THUNK ___preempt_schedule, preempt_schedule | 51 | THUNK ___preempt_schedule, preempt_schedule |
52 | #ifdef CONFIG_CONTEXT_TRACKING | 52 | THUNK ___preempt_schedule_notrace, preempt_schedule_notrace |
53 | THUNK ___preempt_schedule_context, preempt_schedule_context | ||
54 | #endif | ||
55 | #endif | 53 | #endif |
56 | 54 | ||
57 | #if defined(CONFIG_TRACE_IRQFLAGS) \ | 55 | #if defined(CONFIG_TRACE_IRQFLAGS) \ |
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index e2f5e21c03b3..91d93b95bd86 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); | |||
647 | * @from: Source address, in kernel space. | 647 | * @from: Source address, in kernel space. |
648 | * @n: Number of bytes to copy. | 648 | * @n: Number of bytes to copy. |
649 | * | 649 | * |
650 | * Context: User context only. This function may sleep. | 650 | * Context: User context only. This function may sleep if pagefaults are |
651 | * enabled. | ||
651 | * | 652 | * |
652 | * Copy data from kernel space to user space. | 653 | * Copy data from kernel space to user space. |
653 | * | 654 | * |
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user); | |||
668 | * @from: Source address, in user space. | 669 | * @from: Source address, in user space. |
669 | * @n: Number of bytes to copy. | 670 | * @n: Number of bytes to copy. |
670 | * | 671 | * |
671 | * Context: User context only. This function may sleep. | 672 | * Context: User context only. This function may sleep if pagefaults are |
673 | * enabled. | ||
672 | * | 674 | * |
673 | * Copy data from user space to kernel space. | 675 | * Copy data from user space to kernel space. |
674 | * | 676 | * |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 181c53bac3a7..9dc909841739 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ | 13 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ |
14 | #include <linux/prefetch.h> /* prefetchw */ | 14 | #include <linux/prefetch.h> /* prefetchw */ |
15 | #include <linux/context_tracking.h> /* exception_enter(), ... */ | 15 | #include <linux/context_tracking.h> /* exception_enter(), ... */ |
16 | #include <linux/uaccess.h> /* faulthandler_disabled() */ | ||
16 | 17 | ||
17 | #include <asm/traps.h> /* dotraplinkage, ... */ | 18 | #include <asm/traps.h> /* dotraplinkage, ... */ |
18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 19 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
@@ -1126,9 +1127,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1126 | 1127 | ||
1127 | /* | 1128 | /* |
1128 | * If we're in an interrupt, have no user context or are running | 1129 | * If we're in an interrupt, have no user context or are running |
1129 | * in an atomic region then we must not take the fault: | 1130 | * in a region with pagefaults disabled then we must not take the fault |
1130 | */ | 1131 | */ |
1131 | if (unlikely(in_atomic() || !mm)) { | 1132 | if (unlikely(faulthandler_disabled() || !mm)) { |
1132 | bad_area_nosemaphore(regs, error_code, address); | 1133 | bad_area_nosemaphore(regs, error_code, address); |
1133 | return; | 1134 | return; |
1134 | } | 1135 | } |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 4500142bc4aa..eecb207a2037 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |||
35 | unsigned long vaddr; | 35 | unsigned long vaddr; |
36 | int idx, type; | 36 | int idx, type; |
37 | 37 | ||
38 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | 38 | preempt_disable(); |
39 | pagefault_disable(); | 39 | pagefault_disable(); |
40 | 40 | ||
41 | if (!PageHighMem(page)) | 41 | if (!PageHighMem(page)) |
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr) | |||
100 | #endif | 100 | #endif |
101 | 101 | ||
102 | pagefault_enable(); | 102 | pagefault_enable(); |
103 | preempt_enable(); | ||
103 | } | 104 | } |
104 | EXPORT_SYMBOL(__kunmap_atomic); | 105 | EXPORT_SYMBOL(__kunmap_atomic); |
105 | 106 | ||
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..2b7ece0e103a 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |||
59 | unsigned long vaddr; | 59 | unsigned long vaddr; |
60 | int idx, type; | 60 | int idx, type; |
61 | 61 | ||
62 | preempt_disable(); | ||
62 | pagefault_disable(); | 63 | pagefault_disable(); |
63 | 64 | ||
64 | type = kmap_atomic_idx_push(); | 65 | type = kmap_atomic_idx_push(); |
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr) | |||
117 | } | 118 | } |
118 | 119 | ||
119 | pagefault_enable(); | 120 | pagefault_enable(); |
121 | preempt_enable(); | ||
120 | } | 122 | } |
121 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 123 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 9e3571a6535c..83a44a33cfa1 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c | |||
@@ -15,10 +15,10 @@ | |||
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
18 | #include <linux/uaccess.h> | ||
18 | #include <asm/mmu_context.h> | 19 | #include <asm/mmu_context.h> |
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/hardirq.h> | 21 | #include <asm/hardirq.h> |
21 | #include <asm/uaccess.h> | ||
22 | #include <asm/pgalloc.h> | 22 | #include <asm/pgalloc.h> |
23 | 23 | ||
24 | DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST; | 24 | DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST; |
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs) | |||
57 | /* If we're in an interrupt or have no user | 57 | /* If we're in an interrupt or have no user |
58 | * context, we must not take the fault.. | 58 | * context, we must not take the fault.. |
59 | */ | 59 | */ |
60 | if (in_atomic() || !mm) { | 60 | if (faulthandler_disabled() || !mm) { |
61 | bad_page_fault(regs, address, SIGSEGV); | 61 | bad_page_fault(regs, address, SIGSEGV); |
62 | return; | 62 | return; |
63 | } | 63 | } |
diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c index 8cfb71ec0937..184ceadccc1a 100644 --- a/arch/xtensa/mm/highmem.c +++ b/arch/xtensa/mm/highmem.c | |||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) | |||
42 | enum fixed_addresses idx; | 42 | enum fixed_addresses idx; |
43 | unsigned long vaddr; | 43 | unsigned long vaddr; |
44 | 44 | ||
45 | preempt_disable(); | ||
45 | pagefault_disable(); | 46 | pagefault_disable(); |
46 | if (!PageHighMem(page)) | 47 | if (!PageHighMem(page)) |
47 | return page_address(page); | 48 | return page_address(page); |
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr) | |||
79 | } | 80 | } |
80 | 81 | ||
81 | pagefault_enable(); | 82 | pagefault_enable(); |
83 | preempt_enable(); | ||
82 | } | 84 | } |
83 | EXPORT_SYMBOL(__kunmap_atomic); | 85 | EXPORT_SYMBOL(__kunmap_atomic); |
84 | 86 | ||
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 5f13f4d0bcce..1e28ddb656b8 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -24,7 +24,7 @@ static int get_first_sibling(unsigned int cpu) | |||
24 | { | 24 | { |
25 | unsigned int ret; | 25 | unsigned int ret; |
26 | 26 | ||
27 | ret = cpumask_first(topology_thread_cpumask(cpu)); | 27 | ret = cpumask_first(topology_sibling_cpumask(cpu)); |
28 | if (ret < nr_cpu_ids) | 28 | if (ret < nr_cpu_ids) |
29 | return ret; | 29 | return ret; |
30 | 30 | ||
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6bc9cbc01ad6..00b39802d7ec 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c | |||
@@ -105,7 +105,7 @@ static void round_robin_cpu(unsigned int tsk_index) | |||
105 | mutex_lock(&round_robin_lock); | 105 | mutex_lock(&round_robin_lock); |
106 | cpumask_clear(tmp); | 106 | cpumask_clear(tmp); |
107 | for_each_cpu(cpu, pad_busy_cpus) | 107 | for_each_cpu(cpu, pad_busy_cpus) |
108 | cpumask_or(tmp, tmp, topology_thread_cpumask(cpu)); | 108 | cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu)); |
109 | cpumask_andnot(tmp, cpu_online_mask, tmp); | 109 | cpumask_andnot(tmp, cpu_online_mask, tmp); |
110 | /* avoid HT sibilings if possible */ | 110 | /* avoid HT sibilings if possible */ |
111 | if (cpumask_empty(tmp)) | 111 | if (cpumask_empty(tmp)) |
diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 6491f45200a7..8b7d7f8e5851 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c | |||
@@ -61,7 +61,7 @@ static DEVICE_ATTR_RO(physical_package_id); | |||
61 | define_id_show_func(core_id); | 61 | define_id_show_func(core_id); |
62 | static DEVICE_ATTR_RO(core_id); | 62 | static DEVICE_ATTR_RO(core_id); |
63 | 63 | ||
64 | define_siblings_show_func(thread_siblings, thread_cpumask); | 64 | define_siblings_show_func(thread_siblings, sibling_cpumask); |
65 | static DEVICE_ATTR_RO(thread_siblings); | 65 | static DEVICE_ATTR_RO(thread_siblings); |
66 | static DEVICE_ATTR_RO(thread_siblings_list); | 66 | static DEVICE_ATTR_RO(thread_siblings_list); |
67 | 67 | ||
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index b0c18ed8d83f..0136dfcdabf0 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c | |||
@@ -699,13 +699,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
699 | dmi_check_system(sw_any_bug_dmi_table); | 699 | dmi_check_system(sw_any_bug_dmi_table); |
700 | if (bios_with_sw_any_bug && !policy_is_shared(policy)) { | 700 | if (bios_with_sw_any_bug && !policy_is_shared(policy)) { |
701 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; | 701 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; |
702 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); | 702 | cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); |
703 | } | 703 | } |
704 | 704 | ||
705 | if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { | 705 | if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { |
706 | cpumask_clear(policy->cpus); | 706 | cpumask_clear(policy->cpus); |
707 | cpumask_set_cpu(cpu, policy->cpus); | 707 | cpumask_set_cpu(cpu, policy->cpus); |
708 | cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu)); | 708 | cpumask_copy(data->freqdomain_cpus, |
709 | topology_sibling_cpumask(cpu)); | ||
709 | policy->shared_type = CPUFREQ_SHARED_TYPE_HW; | 710 | policy->shared_type = CPUFREQ_SHARED_TYPE_HW; |
710 | pr_info_once(PFX "overriding BIOS provided _PSD data\n"); | 711 | pr_info_once(PFX "overriding BIOS provided _PSD data\n"); |
711 | } | 712 | } |
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index 529cfd92158f..5dd95dab580d 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c | |||
@@ -172,7 +172,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | |||
172 | unsigned int i; | 172 | unsigned int i; |
173 | 173 | ||
174 | #ifdef CONFIG_SMP | 174 | #ifdef CONFIG_SMP |
175 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | 175 | cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu)); |
176 | #endif | 176 | #endif |
177 | 177 | ||
178 | /* Errata workaround */ | 178 | /* Errata workaround */ |
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index f9ce7e4bf0fe..5c035d04d827 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c | |||
@@ -57,13 +57,6 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
57 | 57 | ||
58 | static struct cpufreq_driver cpufreq_amd64_driver; | 58 | static struct cpufreq_driver cpufreq_amd64_driver; |
59 | 59 | ||
60 | #ifndef CONFIG_SMP | ||
61 | static inline const struct cpumask *cpu_core_mask(int cpu) | ||
62 | { | ||
63 | return cpumask_of(0); | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | /* Return a frequency in MHz, given an input fid */ | 60 | /* Return a frequency in MHz, given an input fid */ |
68 | static u32 find_freq_from_fid(u32 fid) | 61 | static u32 find_freq_from_fid(u32 fid) |
69 | { | 62 | { |
@@ -620,7 +613,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, | |||
620 | 613 | ||
621 | pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | 614 | pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); |
622 | data->powernow_table = powernow_table; | 615 | data->powernow_table = powernow_table; |
623 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 616 | if (cpumask_first(topology_core_cpumask(data->cpu)) == data->cpu) |
624 | print_basics(data); | 617 | print_basics(data); |
625 | 618 | ||
626 | for (j = 0; j < data->numps; j++) | 619 | for (j = 0; j < data->numps; j++) |
@@ -784,7 +777,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
784 | CPUFREQ_TABLE_END; | 777 | CPUFREQ_TABLE_END; |
785 | data->powernow_table = powernow_table; | 778 | data->powernow_table = powernow_table; |
786 | 779 | ||
787 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 780 | if (cpumask_first(topology_core_cpumask(data->cpu)) == data->cpu) |
788 | print_basics(data); | 781 | print_basics(data); |
789 | 782 | ||
790 | /* notify BIOS that we exist */ | 783 | /* notify BIOS that we exist */ |
@@ -1090,7 +1083,7 @@ static int powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1090 | if (rc != 0) | 1083 | if (rc != 0) |
1091 | goto err_out_exit_acpi; | 1084 | goto err_out_exit_acpi; |
1092 | 1085 | ||
1093 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); | 1086 | cpumask_copy(pol->cpus, topology_core_cpumask(pol->cpu)); |
1094 | data->available_cores = pol->cpus; | 1087 | data->available_cores = pol->cpus; |
1095 | 1088 | ||
1096 | /* min/max the cpu is capable of */ | 1089 | /* min/max the cpu is capable of */ |
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index e56d632a8b21..37555c6b86a7 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c | |||
@@ -292,7 +292,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
292 | 292 | ||
293 | /* only run on CPU to be set, or on its sibling */ | 293 | /* only run on CPU to be set, or on its sibling */ |
294 | #ifdef CONFIG_SMP | 294 | #ifdef CONFIG_SMP |
295 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | 295 | cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu)); |
296 | #endif | 296 | #endif |
297 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | 297 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); |
298 | 298 | ||
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index ab300ea19434..a9064e36e7b5 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c | |||
@@ -78,12 +78,14 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
78 | int ret; | 78 | int ret; |
79 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | 79 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); |
80 | 80 | ||
81 | preempt_disable(); | ||
81 | pagefault_disable(); | 82 | pagefault_disable(); |
82 | enable_kernel_altivec(); | 83 | enable_kernel_altivec(); |
83 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); | 84 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); |
84 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); | 85 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); |
85 | pagefault_enable(); | 86 | pagefault_enable(); |
86 | 87 | preempt_enable(); | |
88 | |||
87 | ret += crypto_cipher_setkey(ctx->fallback, key, keylen); | 89 | ret += crypto_cipher_setkey(ctx->fallback, key, keylen); |
88 | return ret; | 90 | return ret; |
89 | } | 91 | } |
@@ -95,10 +97,12 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
95 | if (in_interrupt()) { | 97 | if (in_interrupt()) { |
96 | crypto_cipher_encrypt_one(ctx->fallback, dst, src); | 98 | crypto_cipher_encrypt_one(ctx->fallback, dst, src); |
97 | } else { | 99 | } else { |
100 | preempt_disable(); | ||
98 | pagefault_disable(); | 101 | pagefault_disable(); |
99 | enable_kernel_altivec(); | 102 | enable_kernel_altivec(); |
100 | aes_p8_encrypt(src, dst, &ctx->enc_key); | 103 | aes_p8_encrypt(src, dst, &ctx->enc_key); |
101 | pagefault_enable(); | 104 | pagefault_enable(); |
105 | preempt_enable(); | ||
102 | } | 106 | } |
103 | } | 107 | } |
104 | 108 | ||
@@ -109,10 +113,12 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
109 | if (in_interrupt()) { | 113 | if (in_interrupt()) { |
110 | crypto_cipher_decrypt_one(ctx->fallback, dst, src); | 114 | crypto_cipher_decrypt_one(ctx->fallback, dst, src); |
111 | } else { | 115 | } else { |
116 | preempt_disable(); | ||
112 | pagefault_disable(); | 117 | pagefault_disable(); |
113 | enable_kernel_altivec(); | 118 | enable_kernel_altivec(); |
114 | aes_p8_decrypt(src, dst, &ctx->dec_key); | 119 | aes_p8_decrypt(src, dst, &ctx->dec_key); |
115 | pagefault_enable(); | 120 | pagefault_enable(); |
121 | preempt_enable(); | ||
116 | } | 122 | } |
117 | } | 123 | } |
118 | 124 | ||
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 1a559b7dddb5..477284abdd11 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c | |||
@@ -79,11 +79,13 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
79 | int ret; | 79 | int ret; |
80 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); | 80 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); |
81 | 81 | ||
82 | preempt_disable(); | ||
82 | pagefault_disable(); | 83 | pagefault_disable(); |
83 | enable_kernel_altivec(); | 84 | enable_kernel_altivec(); |
84 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); | 85 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); |
85 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); | 86 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); |
86 | pagefault_enable(); | 87 | pagefault_enable(); |
88 | preempt_enable(); | ||
87 | 89 | ||
88 | ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); | 90 | ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); |
89 | return ret; | 91 | return ret; |
@@ -106,6 +108,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, | |||
106 | if (in_interrupt()) { | 108 | if (in_interrupt()) { |
107 | ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); | 109 | ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); |
108 | } else { | 110 | } else { |
111 | preempt_disable(); | ||
109 | pagefault_disable(); | 112 | pagefault_disable(); |
110 | enable_kernel_altivec(); | 113 | enable_kernel_altivec(); |
111 | 114 | ||
@@ -119,6 +122,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, | |||
119 | } | 122 | } |
120 | 123 | ||
121 | pagefault_enable(); | 124 | pagefault_enable(); |
125 | preempt_enable(); | ||
122 | } | 126 | } |
123 | 127 | ||
124 | return ret; | 128 | return ret; |
@@ -141,6 +145,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, | |||
141 | if (in_interrupt()) { | 145 | if (in_interrupt()) { |
142 | ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); | 146 | ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); |
143 | } else { | 147 | } else { |
148 | preempt_disable(); | ||
144 | pagefault_disable(); | 149 | pagefault_disable(); |
145 | enable_kernel_altivec(); | 150 | enable_kernel_altivec(); |
146 | 151 | ||
@@ -154,6 +159,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, | |||
154 | } | 159 | } |
155 | 160 | ||
156 | pagefault_enable(); | 161 | pagefault_enable(); |
162 | preempt_enable(); | ||
157 | } | 163 | } |
158 | 164 | ||
159 | return ret; | 165 | return ret; |
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index d0ffe277af5c..f255ec4a04d4 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c | |||
@@ -114,11 +114,13 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, | |||
114 | if (keylen != GHASH_KEY_LEN) | 114 | if (keylen != GHASH_KEY_LEN) |
115 | return -EINVAL; | 115 | return -EINVAL; |
116 | 116 | ||
117 | preempt_disable(); | ||
117 | pagefault_disable(); | 118 | pagefault_disable(); |
118 | enable_kernel_altivec(); | 119 | enable_kernel_altivec(); |
119 | enable_kernel_fp(); | 120 | enable_kernel_fp(); |
120 | gcm_init_p8(ctx->htable, (const u64 *) key); | 121 | gcm_init_p8(ctx->htable, (const u64 *) key); |
121 | pagefault_enable(); | 122 | pagefault_enable(); |
123 | preempt_enable(); | ||
122 | return crypto_shash_setkey(ctx->fallback, key, keylen); | 124 | return crypto_shash_setkey(ctx->fallback, key, keylen); |
123 | } | 125 | } |
124 | 126 | ||
@@ -140,23 +142,27 @@ static int p8_ghash_update(struct shash_desc *desc, | |||
140 | } | 142 | } |
141 | memcpy(dctx->buffer + dctx->bytes, src, | 143 | memcpy(dctx->buffer + dctx->bytes, src, |
142 | GHASH_DIGEST_SIZE - dctx->bytes); | 144 | GHASH_DIGEST_SIZE - dctx->bytes); |
145 | preempt_disable(); | ||
143 | pagefault_disable(); | 146 | pagefault_disable(); |
144 | enable_kernel_altivec(); | 147 | enable_kernel_altivec(); |
145 | enable_kernel_fp(); | 148 | enable_kernel_fp(); |
146 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, | 149 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, |
147 | GHASH_DIGEST_SIZE); | 150 | GHASH_DIGEST_SIZE); |
148 | pagefault_enable(); | 151 | pagefault_enable(); |
152 | preempt_enable(); | ||
149 | src += GHASH_DIGEST_SIZE - dctx->bytes; | 153 | src += GHASH_DIGEST_SIZE - dctx->bytes; |
150 | srclen -= GHASH_DIGEST_SIZE - dctx->bytes; | 154 | srclen -= GHASH_DIGEST_SIZE - dctx->bytes; |
151 | dctx->bytes = 0; | 155 | dctx->bytes = 0; |
152 | } | 156 | } |
153 | len = srclen & ~(GHASH_DIGEST_SIZE - 1); | 157 | len = srclen & ~(GHASH_DIGEST_SIZE - 1); |
154 | if (len) { | 158 | if (len) { |
159 | preempt_disable(); | ||
155 | pagefault_disable(); | 160 | pagefault_disable(); |
156 | enable_kernel_altivec(); | 161 | enable_kernel_altivec(); |
157 | enable_kernel_fp(); | 162 | enable_kernel_fp(); |
158 | gcm_ghash_p8(dctx->shash, ctx->htable, src, len); | 163 | gcm_ghash_p8(dctx->shash, ctx->htable, src, len); |
159 | pagefault_enable(); | 164 | pagefault_enable(); |
165 | preempt_enable(); | ||
160 | src += len; | 166 | src += len; |
161 | srclen -= len; | 167 | srclen -= len; |
162 | } | 168 | } |
@@ -180,12 +186,14 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) | |||
180 | if (dctx->bytes) { | 186 | if (dctx->bytes) { |
181 | for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) | 187 | for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) |
182 | dctx->buffer[i] = 0; | 188 | dctx->buffer[i] = 0; |
189 | preempt_disable(); | ||
183 | pagefault_disable(); | 190 | pagefault_disable(); |
184 | enable_kernel_altivec(); | 191 | enable_kernel_altivec(); |
185 | enable_kernel_fp(); | 192 | enable_kernel_fp(); |
186 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, | 193 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, |
187 | GHASH_DIGEST_SIZE); | 194 | GHASH_DIGEST_SIZE); |
188 | pagefault_enable(); | 195 | pagefault_enable(); |
196 | preempt_enable(); | ||
189 | dctx->bytes = 0; | 197 | dctx->bytes = 0; |
190 | } | 198 | } |
191 | memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); | 199 | memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); |
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e793ed4..cc552a4c1f3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "i915_trace.h" | 32 | #include "i915_trace.h" |
33 | #include "intel_drv.h" | 33 | #include "intel_drv.h" |
34 | #include <linux/dma_remapping.h> | 34 | #include <linux/dma_remapping.h> |
35 | #include <linux/uaccess.h> | ||
35 | 36 | ||
36 | #define __EXEC_OBJECT_HAS_PIN (1<<31) | 37 | #define __EXEC_OBJECT_HAS_PIN (1<<31) |
37 | #define __EXEC_OBJECT_HAS_FENCE (1<<30) | 38 | #define __EXEC_OBJECT_HAS_FENCE (1<<30) |
@@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, | |||
465 | } | 466 | } |
466 | 467 | ||
467 | /* We can't wait for rendering with pagefaults disabled */ | 468 | /* We can't wait for rendering with pagefaults disabled */ |
468 | if (obj->active && in_atomic()) | 469 | if (obj->active && pagefault_disabled()) |
469 | return -EFAULT; | 470 | return -EFAULT; |
470 | 471 | ||
471 | if (use_cpu_reloc(obj)) | 472 | if (use_cpu_reloc(obj)) |
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index ed303ba3a593..3e03379e7c5d 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c | |||
@@ -63,7 +63,8 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); | |||
63 | #define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO) | 63 | #define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO) |
64 | 64 | ||
65 | #ifdef CONFIG_SMP | 65 | #ifdef CONFIG_SMP |
66 | #define for_each_sibling(i, cpu) for_each_cpu(i, cpu_sibling_mask(cpu)) | 66 | #define for_each_sibling(i, cpu) \ |
67 | for_each_cpu(i, topology_sibling_cpumask(cpu)) | ||
67 | #else | 68 | #else |
68 | #define for_each_sibling(i, cpu) for (i = 0; false; ) | 69 | #define for_each_sibling(i, cpu) for (i = 0; false; ) |
69 | #endif | 70 | #endif |
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4b00545a3ace..65944dd8bf6b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c | |||
@@ -1304,7 +1304,7 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) | |||
1304 | if (!cpumask_test_cpu(cpu, thread_mask)) { | 1304 | if (!cpumask_test_cpu(cpu, thread_mask)) { |
1305 | ++count; | 1305 | ++count; |
1306 | cpumask_or(thread_mask, thread_mask, | 1306 | cpumask_or(thread_mask, thread_mask, |
1307 | topology_thread_cpumask(cpu)); | 1307 | topology_sibling_cpumask(cpu)); |
1308 | } | 1308 | } |
1309 | } | 1309 | } |
1310 | 1310 | ||
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c index cc3ab351943e..f9262243f935 100644 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c | |||
@@ -87,7 +87,7 @@ static void cfs_cpu_core_siblings(int cpu, cpumask_t *mask) | |||
87 | /* return cpumask of HTs in the same core */ | 87 | /* return cpumask of HTs in the same core */ |
88 | static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask) | 88 | static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask) |
89 | { | 89 | { |
90 | cpumask_copy(mask, topology_thread_cpumask(cpu)); | 90 | cpumask_copy(mask, topology_sibling_cpumask(cpu)); |
91 | } | 91 | } |
92 | 92 | ||
93 | static void cfs_node_to_cpumask(int node, cpumask_t *mask) | 93 | static void cfs_node_to_cpumask(int node, cpumask_t *mask) |
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 8e61421515cb..344189ac5698 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c | |||
@@ -557,7 +557,7 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service *svc, | |||
557 | * there are. | 557 | * there are. |
558 | */ | 558 | */ |
559 | /* weight is # of HTs */ | 559 | /* weight is # of HTs */ |
560 | if (cpumask_weight(topology_thread_cpumask(0)) > 1) { | 560 | if (cpumask_weight(topology_sibling_cpumask(0)) > 1) { |
561 | /* depress thread factor for hyper-thread */ | 561 | /* depress thread factor for hyper-thread */ |
562 | factor = factor - (factor >> 1) + (factor >> 3); | 562 | factor = factor - (factor >> 1) + (factor >> 3); |
563 | } | 563 | } |
@@ -2768,7 +2768,7 @@ int ptlrpc_hr_init(void) | |||
2768 | 2768 | ||
2769 | init_waitqueue_head(&ptlrpc_hr.hr_waitq); | 2769 | init_waitqueue_head(&ptlrpc_hr.hr_waitq); |
2770 | 2770 | ||
2771 | weight = cpumask_weight(topology_thread_cpumask(0)); | 2771 | weight = cpumask_weight(topology_sibling_cpumask(0)); |
2772 | 2772 | ||
2773 | cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { | 2773 | cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { |
2774 | hrp->hrp_cpt = i; | 2774 | hrp->hrp_cpt = i; |
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index b59b5a52637e..e56272c919b5 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h | |||
@@ -8,8 +8,7 @@ | |||
8 | #ifndef CONFIG_SMP | 8 | #ifndef CONFIG_SMP |
9 | /* | 9 | /* |
10 | * The following implementation only for uniprocessor machines. | 10 | * The following implementation only for uniprocessor machines. |
11 | * For UP, it's relies on the fact that pagefault_disable() also disables | 11 | * It relies on preempt_disable() ensuring mutual exclusion. |
12 | * preemption to ensure mutual exclusion. | ||
13 | * | 12 | * |
14 | */ | 13 | */ |
15 | 14 | ||
@@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
38 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | 37 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) |
39 | oparg = 1 << oparg; | 38 | oparg = 1 << oparg; |
40 | 39 | ||
40 | preempt_disable(); | ||
41 | pagefault_disable(); | 41 | pagefault_disable(); |
42 | 42 | ||
43 | ret = -EFAULT; | 43 | ret = -EFAULT; |
@@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
72 | 72 | ||
73 | out_pagefault_enable: | 73 | out_pagefault_enable: |
74 | pagefault_enable(); | 74 | pagefault_enable(); |
75 | preempt_enable(); | ||
75 | 76 | ||
76 | if (ret == 0) { | 77 | if (ret == 0) { |
77 | switch (cmp) { | 78 | switch (cmp) { |
@@ -106,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
106 | { | 107 | { |
107 | u32 val; | 108 | u32 val; |
108 | 109 | ||
110 | preempt_disable(); | ||
109 | if (unlikely(get_user(val, uaddr) != 0)) | 111 | if (unlikely(get_user(val, uaddr) != 0)) |
110 | return -EFAULT; | 112 | return -EFAULT; |
111 | 113 | ||
@@ -113,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
113 | return -EFAULT; | 115 | return -EFAULT; |
114 | 116 | ||
115 | *uval = val; | 117 | *uval = val; |
118 | preempt_enable(); | ||
116 | 119 | ||
117 | return 0; | 120 | return 0; |
118 | } | 121 | } |
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index eb6f9e6c3075..d0a7a4753db2 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h | |||
@@ -79,11 +79,8 @@ static __always_inline bool should_resched(void) | |||
79 | #ifdef CONFIG_PREEMPT | 79 | #ifdef CONFIG_PREEMPT |
80 | extern asmlinkage void preempt_schedule(void); | 80 | extern asmlinkage void preempt_schedule(void); |
81 | #define __preempt_schedule() preempt_schedule() | 81 | #define __preempt_schedule() preempt_schedule() |
82 | 82 | extern asmlinkage void preempt_schedule_notrace(void); | |
83 | #ifdef CONFIG_CONTEXT_TRACKING | 83 | #define __preempt_schedule_notrace() preempt_schedule_notrace() |
84 | extern asmlinkage void preempt_schedule_context(void); | ||
85 | #define __preempt_schedule_context() preempt_schedule_context() | ||
86 | #endif | ||
87 | #endif /* CONFIG_PREEMPT */ | 84 | #endif /* CONFIG_PREEMPT */ |
88 | 85 | ||
89 | #endif /* __ASM_PREEMPT_H */ | 86 | #endif /* __ASM_PREEMPT_H */ |
diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 86c12c93e3cf..8fdcb783197d 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _LINUX_BH_H | 2 | #define _LINUX_BH_H |
3 | 3 | ||
4 | #include <linux/preempt.h> | 4 | #include <linux/preempt.h> |
5 | #include <linux/preempt_mask.h> | ||
6 | 5 | ||
7 | #ifdef CONFIG_TRACE_IRQFLAGS | 6 | #ifdef CONFIG_TRACE_IRQFLAGS |
8 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | 7 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f4af03404b97..dfd59d6bc6f0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef LINUX_HARDIRQ_H | 1 | #ifndef LINUX_HARDIRQ_H |
2 | #define LINUX_HARDIRQ_H | 2 | #define LINUX_HARDIRQ_H |
3 | 3 | ||
4 | #include <linux/preempt_mask.h> | 4 | #include <linux/preempt.h> |
5 | #include <linux/lockdep.h> | 5 | #include <linux/lockdep.h> |
6 | #include <linux/ftrace_irq.h> | 6 | #include <linux/ftrace_irq.h> |
7 | #include <linux/vtime.h> | 7 | #include <linux/vtime.h> |
diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 9286a46b7d69..6aefcd0031a6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h | |||
@@ -65,6 +65,7 @@ static inline void kunmap(struct page *page) | |||
65 | 65 | ||
66 | static inline void *kmap_atomic(struct page *page) | 66 | static inline void *kmap_atomic(struct page *page) |
67 | { | 67 | { |
68 | preempt_disable(); | ||
68 | pagefault_disable(); | 69 | pagefault_disable(); |
69 | return page_address(page); | 70 | return page_address(page); |
70 | } | 71 | } |
@@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page) | |||
73 | static inline void __kunmap_atomic(void *addr) | 74 | static inline void __kunmap_atomic(void *addr) |
74 | { | 75 | { |
75 | pagefault_enable(); | 76 | pagefault_enable(); |
77 | preempt_enable(); | ||
76 | } | 78 | } |
77 | 79 | ||
78 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | 80 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 696d22312b31..bb9b075f0eb0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -50,9 +50,8 @@ extern struct fs_struct init_fs; | |||
50 | .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ | 50 | .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ |
51 | .rlim = INIT_RLIMITS, \ | 51 | .rlim = INIT_RLIMITS, \ |
52 | .cputimer = { \ | 52 | .cputimer = { \ |
53 | .cputime = INIT_CPUTIME, \ | 53 | .cputime_atomic = INIT_CPUTIME_ATOMIC, \ |
54 | .running = 0, \ | 54 | .running = 0, \ |
55 | .lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ | ||
56 | }, \ | 55 | }, \ |
57 | .cred_guard_mutex = \ | 56 | .cred_guard_mutex = \ |
58 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ | 57 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ |
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 657fab4efab3..c27dde7215b5 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h | |||
@@ -141,6 +141,7 @@ static inline void __iomem * | |||
141 | io_mapping_map_atomic_wc(struct io_mapping *mapping, | 141 | io_mapping_map_atomic_wc(struct io_mapping *mapping, |
142 | unsigned long offset) | 142 | unsigned long offset) |
143 | { | 143 | { |
144 | preempt_disable(); | ||
144 | pagefault_disable(); | 145 | pagefault_disable(); |
145 | return ((char __force __iomem *) mapping) + offset; | 146 | return ((char __force __iomem *) mapping) + offset; |
146 | } | 147 | } |
@@ -149,6 +150,7 @@ static inline void | |||
149 | io_mapping_unmap_atomic(void __iomem *vaddr) | 150 | io_mapping_unmap_atomic(void __iomem *vaddr) |
150 | { | 151 | { |
151 | pagefault_enable(); | 152 | pagefault_enable(); |
153 | preempt_enable(); | ||
152 | } | 154 | } |
153 | 155 | ||
154 | /* Non-atomic map/unmap */ | 156 | /* Non-atomic map/unmap */ |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3a5b48e52a9e..060dd7b61c6d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) | |||
244 | 244 | ||
245 | #if defined(CONFIG_MMU) && \ | 245 | #if defined(CONFIG_MMU) && \ |
246 | (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) | 246 | (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) |
247 | void might_fault(void); | 247 | #define might_fault() __might_fault(__FILE__, __LINE__) |
248 | void __might_fault(const char *file, int line); | ||
248 | #else | 249 | #else |
249 | static inline void might_fault(void) { } | 250 | static inline void might_fault(void) { } |
250 | #endif | 251 | #endif |
diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0081f000e34b..c92ebd100d9b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h | |||
@@ -52,10 +52,15 @@ struct lglock { | |||
52 | static struct lglock name = { .lock = &name ## _lock } | 52 | static struct lglock name = { .lock = &name ## _lock } |
53 | 53 | ||
54 | void lg_lock_init(struct lglock *lg, char *name); | 54 | void lg_lock_init(struct lglock *lg, char *name); |
55 | |||
55 | void lg_local_lock(struct lglock *lg); | 56 | void lg_local_lock(struct lglock *lg); |
56 | void lg_local_unlock(struct lglock *lg); | 57 | void lg_local_unlock(struct lglock *lg); |
57 | void lg_local_lock_cpu(struct lglock *lg, int cpu); | 58 | void lg_local_lock_cpu(struct lglock *lg, int cpu); |
58 | void lg_local_unlock_cpu(struct lglock *lg, int cpu); | 59 | void lg_local_unlock_cpu(struct lglock *lg, int cpu); |
60 | |||
61 | void lg_double_lock(struct lglock *lg, int cpu1, int cpu2); | ||
62 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); | ||
63 | |||
59 | void lg_global_lock(struct lglock *lg); | 64 | void lg_global_lock(struct lglock *lg); |
60 | void lg_global_unlock(struct lglock *lg); | 65 | void lg_global_unlock(struct lglock *lg); |
61 | 66 | ||
diff --git a/include/linux/preempt.h b/include/linux/preempt.h index de83b4eb1642..0f1534acaf60 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h | |||
@@ -10,13 +10,117 @@ | |||
10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * We use the MSB mostly because its available; see <linux/preempt_mask.h> for | 13 | * We put the hardirq and softirq counter into the preemption |
14 | * the other bits -- can't include that header due to inclusion hell. | 14 | * counter. The bitmask has the following meaning: |
15 | * | ||
16 | * - bits 0-7 are the preemption count (max preemption depth: 256) | ||
17 | * - bits 8-15 are the softirq count (max # of softirqs: 256) | ||
18 | * | ||
19 | * The hardirq count could in theory be the same as the number of | ||
20 | * interrupts in the system, but we run all interrupt handlers with | ||
21 | * interrupts disabled, so we cannot have nesting interrupts. Though | ||
22 | * there are a few palaeontologic drivers which reenable interrupts in | ||
23 | * the handler, so we need more than one bit here. | ||
24 | * | ||
25 | * PREEMPT_MASK: 0x000000ff | ||
26 | * SOFTIRQ_MASK: 0x0000ff00 | ||
27 | * HARDIRQ_MASK: 0x000f0000 | ||
28 | * NMI_MASK: 0x00100000 | ||
29 | * PREEMPT_ACTIVE: 0x00200000 | ||
30 | * PREEMPT_NEED_RESCHED: 0x80000000 | ||
15 | */ | 31 | */ |
32 | #define PREEMPT_BITS 8 | ||
33 | #define SOFTIRQ_BITS 8 | ||
34 | #define HARDIRQ_BITS 4 | ||
35 | #define NMI_BITS 1 | ||
36 | |||
37 | #define PREEMPT_SHIFT 0 | ||
38 | #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) | ||
39 | #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) | ||
40 | #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) | ||
41 | |||
42 | #define __IRQ_MASK(x) ((1UL << (x))-1) | ||
43 | |||
44 | #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) | ||
45 | #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | ||
46 | #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) | ||
47 | #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) | ||
48 | |||
49 | #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) | ||
50 | #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) | ||
51 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | ||
52 | #define NMI_OFFSET (1UL << NMI_SHIFT) | ||
53 | |||
54 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
55 | |||
56 | #define PREEMPT_ACTIVE_BITS 1 | ||
57 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | ||
58 | #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) | ||
59 | |||
60 | /* We use the MSB mostly because its available */ | ||
16 | #define PREEMPT_NEED_RESCHED 0x80000000 | 61 | #define PREEMPT_NEED_RESCHED 0x80000000 |
17 | 62 | ||
63 | /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ | ||
18 | #include <asm/preempt.h> | 64 | #include <asm/preempt.h> |
19 | 65 | ||
66 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | ||
67 | #define softirq_count() (preempt_count() & SOFTIRQ_MASK) | ||
68 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | ||
69 | | NMI_MASK)) | ||
70 | |||
71 | /* | ||
72 | * Are we doing bottom half or hardware interrupt processing? | ||
73 | * Are we in a softirq context? Interrupt context? | ||
74 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
75 | * in_serving_softirq - Are we currently processing softirq? | ||
76 | */ | ||
77 | #define in_irq() (hardirq_count()) | ||
78 | #define in_softirq() (softirq_count()) | ||
79 | #define in_interrupt() (irq_count()) | ||
80 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
81 | |||
82 | /* | ||
83 | * Are we in NMI context? | ||
84 | */ | ||
85 | #define in_nmi() (preempt_count() & NMI_MASK) | ||
86 | |||
87 | #if defined(CONFIG_PREEMPT_COUNT) | ||
88 | # define PREEMPT_DISABLE_OFFSET 1 | ||
89 | #else | ||
90 | # define PREEMPT_DISABLE_OFFSET 0 | ||
91 | #endif | ||
92 | |||
93 | /* | ||
94 | * The preempt_count offset needed for things like: | ||
95 | * | ||
96 | * spin_lock_bh() | ||
97 | * | ||
98 | * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and | ||
99 | * softirqs, such that unlock sequences of: | ||
100 | * | ||
101 | * spin_unlock(); | ||
102 | * local_bh_enable(); | ||
103 | * | ||
104 | * Work as expected. | ||
105 | */ | ||
106 | #define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) | ||
107 | |||
108 | /* | ||
109 | * Are we running in atomic context? WARNING: this macro cannot | ||
110 | * always detect atomic context; in particular, it cannot know about | ||
111 | * held spinlocks in non-preemptible kernels. Thus it should not be | ||
112 | * used in the general case to determine whether sleeping is possible. | ||
113 | * Do not use in_atomic() in driver code. | ||
114 | */ | ||
115 | #define in_atomic() (preempt_count() != 0) | ||
116 | |||
117 | /* | ||
118 | * Check whether we were atomic before we did preempt_disable(): | ||
119 | * (used by the scheduler) | ||
120 | */ | ||
121 | #define in_atomic_preempt_off() \ | ||
122 | ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) | ||
123 | |||
20 | #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) | 124 | #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) |
21 | extern void preempt_count_add(int val); | 125 | extern void preempt_count_add(int val); |
22 | extern void preempt_count_sub(int val); | 126 | extern void preempt_count_sub(int val); |
@@ -33,6 +137,18 @@ extern void preempt_count_sub(int val); | |||
33 | #define preempt_count_inc() preempt_count_add(1) | 137 | #define preempt_count_inc() preempt_count_add(1) |
34 | #define preempt_count_dec() preempt_count_sub(1) | 138 | #define preempt_count_dec() preempt_count_sub(1) |
35 | 139 | ||
140 | #define preempt_active_enter() \ | ||
141 | do { \ | ||
142 | preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ | ||
143 | barrier(); \ | ||
144 | } while (0) | ||
145 | |||
146 | #define preempt_active_exit() \ | ||
147 | do { \ | ||
148 | barrier(); \ | ||
149 | preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ | ||
150 | } while (0) | ||
151 | |||
36 | #ifdef CONFIG_PREEMPT_COUNT | 152 | #ifdef CONFIG_PREEMPT_COUNT |
37 | 153 | ||
38 | #define preempt_disable() \ | 154 | #define preempt_disable() \ |
@@ -49,6 +165,8 @@ do { \ | |||
49 | 165 | ||
50 | #define preempt_enable_no_resched() sched_preempt_enable_no_resched() | 166 | #define preempt_enable_no_resched() sched_preempt_enable_no_resched() |
51 | 167 | ||
168 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | ||
169 | |||
52 | #ifdef CONFIG_PREEMPT | 170 | #ifdef CONFIG_PREEMPT |
53 | #define preempt_enable() \ | 171 | #define preempt_enable() \ |
54 | do { \ | 172 | do { \ |
@@ -57,52 +175,46 @@ do { \ | |||
57 | __preempt_schedule(); \ | 175 | __preempt_schedule(); \ |
58 | } while (0) | 176 | } while (0) |
59 | 177 | ||
178 | #define preempt_enable_notrace() \ | ||
179 | do { \ | ||
180 | barrier(); \ | ||
181 | if (unlikely(__preempt_count_dec_and_test())) \ | ||
182 | __preempt_schedule_notrace(); \ | ||
183 | } while (0) | ||
184 | |||
60 | #define preempt_check_resched() \ | 185 | #define preempt_check_resched() \ |
61 | do { \ | 186 | do { \ |
62 | if (should_resched()) \ | 187 | if (should_resched()) \ |
63 | __preempt_schedule(); \ | 188 | __preempt_schedule(); \ |
64 | } while (0) | 189 | } while (0) |
65 | 190 | ||
66 | #else | 191 | #else /* !CONFIG_PREEMPT */ |
67 | #define preempt_enable() \ | 192 | #define preempt_enable() \ |
68 | do { \ | 193 | do { \ |
69 | barrier(); \ | 194 | barrier(); \ |
70 | preempt_count_dec(); \ | 195 | preempt_count_dec(); \ |
71 | } while (0) | 196 | } while (0) |
72 | #define preempt_check_resched() do { } while (0) | ||
73 | #endif | ||
74 | |||
75 | #define preempt_disable_notrace() \ | ||
76 | do { \ | ||
77 | __preempt_count_inc(); \ | ||
78 | barrier(); \ | ||
79 | } while (0) | ||
80 | 197 | ||
81 | #define preempt_enable_no_resched_notrace() \ | 198 | #define preempt_enable_notrace() \ |
82 | do { \ | 199 | do { \ |
83 | barrier(); \ | 200 | barrier(); \ |
84 | __preempt_count_dec(); \ | 201 | __preempt_count_dec(); \ |
85 | } while (0) | 202 | } while (0) |
86 | 203 | ||
87 | #ifdef CONFIG_PREEMPT | 204 | #define preempt_check_resched() do { } while (0) |
88 | 205 | #endif /* CONFIG_PREEMPT */ | |
89 | #ifndef CONFIG_CONTEXT_TRACKING | ||
90 | #define __preempt_schedule_context() __preempt_schedule() | ||
91 | #endif | ||
92 | 206 | ||
93 | #define preempt_enable_notrace() \ | 207 | #define preempt_disable_notrace() \ |
94 | do { \ | 208 | do { \ |
209 | __preempt_count_inc(); \ | ||
95 | barrier(); \ | 210 | barrier(); \ |
96 | if (unlikely(__preempt_count_dec_and_test())) \ | ||
97 | __preempt_schedule_context(); \ | ||
98 | } while (0) | 211 | } while (0) |
99 | #else | 212 | |
100 | #define preempt_enable_notrace() \ | 213 | #define preempt_enable_no_resched_notrace() \ |
101 | do { \ | 214 | do { \ |
102 | barrier(); \ | 215 | barrier(); \ |
103 | __preempt_count_dec(); \ | 216 | __preempt_count_dec(); \ |
104 | } while (0) | 217 | } while (0) |
105 | #endif | ||
106 | 218 | ||
107 | #else /* !CONFIG_PREEMPT_COUNT */ | 219 | #else /* !CONFIG_PREEMPT_COUNT */ |
108 | 220 | ||
@@ -121,6 +233,7 @@ do { \ | |||
121 | #define preempt_disable_notrace() barrier() | 233 | #define preempt_disable_notrace() barrier() |
122 | #define preempt_enable_no_resched_notrace() barrier() | 234 | #define preempt_enable_no_resched_notrace() barrier() |
123 | #define preempt_enable_notrace() barrier() | 235 | #define preempt_enable_notrace() barrier() |
236 | #define preemptible() 0 | ||
124 | 237 | ||
125 | #endif /* CONFIG_PREEMPT_COUNT */ | 238 | #endif /* CONFIG_PREEMPT_COUNT */ |
126 | 239 | ||
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h deleted file mode 100644 index dbeec4d4a3be..000000000000 --- a/include/linux/preempt_mask.h +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | #ifndef LINUX_PREEMPT_MASK_H | ||
2 | #define LINUX_PREEMPT_MASK_H | ||
3 | |||
4 | #include <linux/preempt.h> | ||
5 | |||
6 | /* | ||
7 | * We put the hardirq and softirq counter into the preemption | ||
8 | * counter. The bitmask has the following meaning: | ||
9 | * | ||
10 | * - bits 0-7 are the preemption count (max preemption depth: 256) | ||
11 | * - bits 8-15 are the softirq count (max # of softirqs: 256) | ||
12 | * | ||
13 | * The hardirq count could in theory be the same as the number of | ||
14 | * interrupts in the system, but we run all interrupt handlers with | ||
15 | * interrupts disabled, so we cannot have nesting interrupts. Though | ||
16 | * there are a few palaeontologic drivers which reenable interrupts in | ||
17 | * the handler, so we need more than one bit here. | ||
18 | * | ||
19 | * PREEMPT_MASK: 0x000000ff | ||
20 | * SOFTIRQ_MASK: 0x0000ff00 | ||
21 | * HARDIRQ_MASK: 0x000f0000 | ||
22 | * NMI_MASK: 0x00100000 | ||
23 | * PREEMPT_ACTIVE: 0x00200000 | ||
24 | */ | ||
25 | #define PREEMPT_BITS 8 | ||
26 | #define SOFTIRQ_BITS 8 | ||
27 | #define HARDIRQ_BITS 4 | ||
28 | #define NMI_BITS 1 | ||
29 | |||
30 | #define PREEMPT_SHIFT 0 | ||
31 | #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) | ||
32 | #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) | ||
33 | #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) | ||
34 | |||
35 | #define __IRQ_MASK(x) ((1UL << (x))-1) | ||
36 | |||
37 | #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) | ||
38 | #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | ||
39 | #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) | ||
40 | #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) | ||
41 | |||
42 | #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) | ||
43 | #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) | ||
44 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | ||
45 | #define NMI_OFFSET (1UL << NMI_SHIFT) | ||
46 | |||
47 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
48 | |||
49 | #define PREEMPT_ACTIVE_BITS 1 | ||
50 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | ||
51 | #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) | ||
52 | |||
53 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | ||
54 | #define softirq_count() (preempt_count() & SOFTIRQ_MASK) | ||
55 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | ||
56 | | NMI_MASK)) | ||
57 | |||
58 | /* | ||
59 | * Are we doing bottom half or hardware interrupt processing? | ||
60 | * Are we in a softirq context? Interrupt context? | ||
61 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
62 | * in_serving_softirq - Are we currently processing softirq? | ||
63 | */ | ||
64 | #define in_irq() (hardirq_count()) | ||
65 | #define in_softirq() (softirq_count()) | ||
66 | #define in_interrupt() (irq_count()) | ||
67 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
68 | |||
69 | /* | ||
70 | * Are we in NMI context? | ||
71 | */ | ||
72 | #define in_nmi() (preempt_count() & NMI_MASK) | ||
73 | |||
74 | #if defined(CONFIG_PREEMPT_COUNT) | ||
75 | # define PREEMPT_CHECK_OFFSET 1 | ||
76 | #else | ||
77 | # define PREEMPT_CHECK_OFFSET 0 | ||
78 | #endif | ||
79 | |||
80 | /* | ||
81 | * The preempt_count offset needed for things like: | ||
82 | * | ||
83 | * spin_lock_bh() | ||
84 | * | ||
85 | * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and | ||
86 | * softirqs, such that unlock sequences of: | ||
87 | * | ||
88 | * spin_unlock(); | ||
89 | * local_bh_enable(); | ||
90 | * | ||
91 | * Work as expected. | ||
92 | */ | ||
93 | #define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) | ||
94 | |||
95 | /* | ||
96 | * Are we running in atomic context? WARNING: this macro cannot | ||
97 | * always detect atomic context; in particular, it cannot know about | ||
98 | * held spinlocks in non-preemptible kernels. Thus it should not be | ||
99 | * used in the general case to determine whether sleeping is possible. | ||
100 | * Do not use in_atomic() in driver code. | ||
101 | */ | ||
102 | #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) | ||
103 | |||
104 | /* | ||
105 | * Check whether we were atomic before we did preempt_disable(): | ||
106 | * (used by the scheduler, *after* releasing the kernel lock) | ||
107 | */ | ||
108 | #define in_atomic_preempt_off() \ | ||
109 | ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) | ||
110 | |||
111 | #ifdef CONFIG_PREEMPT_COUNT | ||
112 | # define preemptible() (preempt_count() == 0 && !irqs_disabled()) | ||
113 | #else | ||
114 | # define preemptible() 0 | ||
115 | #endif | ||
116 | |||
117 | #endif /* LINUX_PREEMPT_MASK_H */ | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 659f5729cacc..d4193d5613cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -25,7 +25,7 @@ struct sched_param { | |||
25 | #include <linux/errno.h> | 25 | #include <linux/errno.h> |
26 | #include <linux/nodemask.h> | 26 | #include <linux/nodemask.h> |
27 | #include <linux/mm_types.h> | 27 | #include <linux/mm_types.h> |
28 | #include <linux/preempt_mask.h> | 28 | #include <linux/preempt.h> |
29 | 29 | ||
30 | #include <asm/page.h> | 30 | #include <asm/page.h> |
31 | #include <asm/ptrace.h> | 31 | #include <asm/ptrace.h> |
@@ -174,7 +174,12 @@ extern unsigned long nr_iowait_cpu(int cpu); | |||
174 | extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); | 174 | extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); |
175 | 175 | ||
176 | extern void calc_global_load(unsigned long ticks); | 176 | extern void calc_global_load(unsigned long ticks); |
177 | |||
178 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | ||
177 | extern void update_cpu_load_nohz(void); | 179 | extern void update_cpu_load_nohz(void); |
180 | #else | ||
181 | static inline void update_cpu_load_nohz(void) { } | ||
182 | #endif | ||
178 | 183 | ||
179 | extern unsigned long get_parent_ip(unsigned long addr); | 184 | extern unsigned long get_parent_ip(unsigned long addr); |
180 | 185 | ||
@@ -214,9 +219,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); | |||
214 | #define TASK_WAKEKILL 128 | 219 | #define TASK_WAKEKILL 128 |
215 | #define TASK_WAKING 256 | 220 | #define TASK_WAKING 256 |
216 | #define TASK_PARKED 512 | 221 | #define TASK_PARKED 512 |
217 | #define TASK_STATE_MAX 1024 | 222 | #define TASK_NOLOAD 1024 |
223 | #define TASK_STATE_MAX 2048 | ||
218 | 224 | ||
219 | #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" | 225 | #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" |
220 | 226 | ||
221 | extern char ___assert_task_state[1 - 2*!!( | 227 | extern char ___assert_task_state[1 - 2*!!( |
222 | sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; | 228 | sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; |
@@ -226,6 +232,8 @@ extern char ___assert_task_state[1 - 2*!!( | |||
226 | #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) | 232 | #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) |
227 | #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) | 233 | #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) |
228 | 234 | ||
235 | #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) | ||
236 | |||
229 | /* Convenience macros for the sake of wake_up */ | 237 | /* Convenience macros for the sake of wake_up */ |
230 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) | 238 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) |
231 | #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) | 239 | #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) |
@@ -241,7 +249,8 @@ extern char ___assert_task_state[1 - 2*!!( | |||
241 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | 249 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) |
242 | #define task_contributes_to_load(task) \ | 250 | #define task_contributes_to_load(task) \ |
243 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | 251 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ |
244 | (task->flags & PF_FROZEN) == 0) | 252 | (task->flags & PF_FROZEN) == 0 && \ |
253 | (task->state & TASK_NOLOAD) == 0) | ||
245 | 254 | ||
246 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | 255 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
247 | 256 | ||
@@ -568,6 +577,23 @@ struct task_cputime { | |||
568 | .sum_exec_runtime = 0, \ | 577 | .sum_exec_runtime = 0, \ |
569 | } | 578 | } |
570 | 579 | ||
580 | /* | ||
581 | * This is the atomic variant of task_cputime, which can be used for | ||
582 | * storing and updating task_cputime statistics without locking. | ||
583 | */ | ||
584 | struct task_cputime_atomic { | ||
585 | atomic64_t utime; | ||
586 | atomic64_t stime; | ||
587 | atomic64_t sum_exec_runtime; | ||
588 | }; | ||
589 | |||
590 | #define INIT_CPUTIME_ATOMIC \ | ||
591 | (struct task_cputime_atomic) { \ | ||
592 | .utime = ATOMIC64_INIT(0), \ | ||
593 | .stime = ATOMIC64_INIT(0), \ | ||
594 | .sum_exec_runtime = ATOMIC64_INIT(0), \ | ||
595 | } | ||
596 | |||
571 | #ifdef CONFIG_PREEMPT_COUNT | 597 | #ifdef CONFIG_PREEMPT_COUNT |
572 | #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) | 598 | #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) |
573 | #else | 599 | #else |
@@ -585,18 +611,16 @@ struct task_cputime { | |||
585 | 611 | ||
586 | /** | 612 | /** |
587 | * struct thread_group_cputimer - thread group interval timer counts | 613 | * struct thread_group_cputimer - thread group interval timer counts |
588 | * @cputime: thread group interval timers. | 614 | * @cputime_atomic: atomic thread group interval timers. |
589 | * @running: non-zero when there are timers running and | 615 | * @running: non-zero when there are timers running and |
590 | * @cputime receives updates. | 616 | * @cputime receives updates. |
591 | * @lock: lock for fields in this struct. | ||
592 | * | 617 | * |
593 | * This structure contains the version of task_cputime, above, that is | 618 | * This structure contains the version of task_cputime, above, that is |
594 | * used for thread group CPU timer calculations. | 619 | * used for thread group CPU timer calculations. |
595 | */ | 620 | */ |
596 | struct thread_group_cputimer { | 621 | struct thread_group_cputimer { |
597 | struct task_cputime cputime; | 622 | struct task_cputime_atomic cputime_atomic; |
598 | int running; | 623 | int running; |
599 | raw_spinlock_t lock; | ||
600 | }; | 624 | }; |
601 | 625 | ||
602 | #include <linux/rwsem.h> | 626 | #include <linux/rwsem.h> |
@@ -901,6 +925,50 @@ enum cpu_idle_type { | |||
901 | #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) | 925 | #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) |
902 | 926 | ||
903 | /* | 927 | /* |
928 | * Wake-queues are lists of tasks with a pending wakeup, whose | ||
929 | * callers have already marked the task as woken internally, | ||
930 | * and can thus carry on. A common use case is being able to | ||
931 | * do the wakeups once the corresponding user lock as been | ||
932 | * released. | ||
933 | * | ||
934 | * We hold reference to each task in the list across the wakeup, | ||
935 | * thus guaranteeing that the memory is still valid by the time | ||
936 | * the actual wakeups are performed in wake_up_q(). | ||
937 | * | ||
938 | * One per task suffices, because there's never a need for a task to be | ||
939 | * in two wake queues simultaneously; it is forbidden to abandon a task | ||
940 | * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is | ||
941 | * already in a wake queue, the wakeup will happen soon and the second | ||
942 | * waker can just skip it. | ||
943 | * | ||
944 | * The WAKE_Q macro declares and initializes the list head. | ||
945 | * wake_up_q() does NOT reinitialize the list; it's expected to be | ||
946 | * called near the end of a function, where the fact that the queue is | ||
947 | * not used again will be easy to see by inspection. | ||
948 | * | ||
949 | * Note that this can cause spurious wakeups. schedule() callers | ||
950 | * must ensure the call is done inside a loop, confirming that the | ||
951 | * wakeup condition has in fact occurred. | ||
952 | */ | ||
953 | struct wake_q_node { | ||
954 | struct wake_q_node *next; | ||
955 | }; | ||
956 | |||
957 | struct wake_q_head { | ||
958 | struct wake_q_node *first; | ||
959 | struct wake_q_node **lastp; | ||
960 | }; | ||
961 | |||
962 | #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) | ||
963 | |||
964 | #define WAKE_Q(name) \ | ||
965 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } | ||
966 | |||
967 | extern void wake_q_add(struct wake_q_head *head, | ||
968 | struct task_struct *task); | ||
969 | extern void wake_up_q(struct wake_q_head *head); | ||
970 | |||
971 | /* | ||
904 | * sched-domains (multiprocessor balancing) declarations: | 972 | * sched-domains (multiprocessor balancing) declarations: |
905 | */ | 973 | */ |
906 | #ifdef CONFIG_SMP | 974 | #ifdef CONFIG_SMP |
@@ -1335,8 +1403,6 @@ struct task_struct { | |||
1335 | int rcu_read_lock_nesting; | 1403 | int rcu_read_lock_nesting; |
1336 | union rcu_special rcu_read_unlock_special; | 1404 | union rcu_special rcu_read_unlock_special; |
1337 | struct list_head rcu_node_entry; | 1405 | struct list_head rcu_node_entry; |
1338 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
1339 | #ifdef CONFIG_PREEMPT_RCU | ||
1340 | struct rcu_node *rcu_blocked_node; | 1406 | struct rcu_node *rcu_blocked_node; |
1341 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1407 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
1342 | #ifdef CONFIG_TASKS_RCU | 1408 | #ifdef CONFIG_TASKS_RCU |
@@ -1367,7 +1433,7 @@ struct task_struct { | |||
1367 | int exit_state; | 1433 | int exit_state; |
1368 | int exit_code, exit_signal; | 1434 | int exit_code, exit_signal; |
1369 | int pdeath_signal; /* The signal sent when the parent dies */ | 1435 | int pdeath_signal; /* The signal sent when the parent dies */ |
1370 | unsigned int jobctl; /* JOBCTL_*, siglock protected */ | 1436 | unsigned long jobctl; /* JOBCTL_*, siglock protected */ |
1371 | 1437 | ||
1372 | /* Used for emulating ABI behavior of previous Linux versions */ | 1438 | /* Used for emulating ABI behavior of previous Linux versions */ |
1373 | unsigned int personality; | 1439 | unsigned int personality; |
@@ -1513,6 +1579,8 @@ struct task_struct { | |||
1513 | /* Protection of the PI data structures: */ | 1579 | /* Protection of the PI data structures: */ |
1514 | raw_spinlock_t pi_lock; | 1580 | raw_spinlock_t pi_lock; |
1515 | 1581 | ||
1582 | struct wake_q_node wake_q; | ||
1583 | |||
1516 | #ifdef CONFIG_RT_MUTEXES | 1584 | #ifdef CONFIG_RT_MUTEXES |
1517 | /* PI waiters blocked on a rt_mutex held by this task */ | 1585 | /* PI waiters blocked on a rt_mutex held by this task */ |
1518 | struct rb_root pi_waiters; | 1586 | struct rb_root pi_waiters; |
@@ -1726,6 +1794,7 @@ struct task_struct { | |||
1726 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | 1794 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
1727 | unsigned long task_state_change; | 1795 | unsigned long task_state_change; |
1728 | #endif | 1796 | #endif |
1797 | int pagefault_disabled; | ||
1729 | }; | 1798 | }; |
1730 | 1799 | ||
1731 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1800 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
@@ -2079,22 +2148,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) | |||
2079 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ | 2148 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ |
2080 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ | 2149 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ |
2081 | 2150 | ||
2082 | #define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) | 2151 | #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) |
2083 | #define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) | 2152 | #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) |
2084 | #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) | 2153 | #define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) |
2085 | #define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) | 2154 | #define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) |
2086 | #define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) | 2155 | #define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) |
2087 | #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) | 2156 | #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) |
2088 | #define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) | 2157 | #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) |
2089 | 2158 | ||
2090 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) | 2159 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) |
2091 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) | 2160 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) |
2092 | 2161 | ||
2093 | extern bool task_set_jobctl_pending(struct task_struct *task, | 2162 | extern bool task_set_jobctl_pending(struct task_struct *task, |
2094 | unsigned int mask); | 2163 | unsigned long mask); |
2095 | extern void task_clear_jobctl_trapping(struct task_struct *task); | 2164 | extern void task_clear_jobctl_trapping(struct task_struct *task); |
2096 | extern void task_clear_jobctl_pending(struct task_struct *task, | 2165 | extern void task_clear_jobctl_pending(struct task_struct *task, |
2097 | unsigned int mask); | 2166 | unsigned long mask); |
2098 | 2167 | ||
2099 | static inline void rcu_copy_process(struct task_struct *p) | 2168 | static inline void rcu_copy_process(struct task_struct *p) |
2100 | { | 2169 | { |
@@ -2964,11 +3033,6 @@ static __always_inline bool need_resched(void) | |||
2964 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); | 3033 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); |
2965 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); | 3034 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); |
2966 | 3035 | ||
2967 | static inline void thread_group_cputime_init(struct signal_struct *sig) | ||
2968 | { | ||
2969 | raw_spin_lock_init(&sig->cputimer.lock); | ||
2970 | } | ||
2971 | |||
2972 | /* | 3036 | /* |
2973 | * Reevaluate whether the task has signals pending delivery. | 3037 | * Reevaluate whether the task has signals pending delivery. |
2974 | * Wake the task if so. | 3038 | * Wake the task if so. |
@@ -3082,13 +3146,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm) | |||
3082 | static inline unsigned long task_rlimit(const struct task_struct *tsk, | 3146 | static inline unsigned long task_rlimit(const struct task_struct *tsk, |
3083 | unsigned int limit) | 3147 | unsigned int limit) |
3084 | { | 3148 | { |
3085 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); | 3149 | return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); |
3086 | } | 3150 | } |
3087 | 3151 | ||
3088 | static inline unsigned long task_rlimit_max(const struct task_struct *tsk, | 3152 | static inline unsigned long task_rlimit_max(const struct task_struct *tsk, |
3089 | unsigned int limit) | 3153 | unsigned int limit) |
3090 | { | 3154 | { |
3091 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); | 3155 | return READ_ONCE(tsk->signal->rlim[limit].rlim_max); |
3092 | } | 3156 | } |
3093 | 3157 | ||
3094 | static inline unsigned long rlimit(unsigned int limit) | 3158 | static inline unsigned long rlimit(unsigned int limit) |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 909b6e43b694..73ddad1e0fa3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu) | |||
191 | #ifndef topology_core_id | 191 | #ifndef topology_core_id |
192 | #define topology_core_id(cpu) ((void)(cpu), 0) | 192 | #define topology_core_id(cpu) ((void)(cpu), 0) |
193 | #endif | 193 | #endif |
194 | #ifndef topology_thread_cpumask | 194 | #ifndef topology_sibling_cpumask |
195 | #define topology_thread_cpumask(cpu) cpumask_of(cpu) | 195 | #define topology_sibling_cpumask(cpu) cpumask_of(cpu) |
196 | #endif | 196 | #endif |
197 | #ifndef topology_core_cpumask | 197 | #ifndef topology_core_cpumask |
198 | #define topology_core_cpumask(cpu) cpumask_of(cpu) | 198 | #define topology_core_cpumask(cpu) cpumask_of(cpu) |
@@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu) | |||
201 | #ifdef CONFIG_SCHED_SMT | 201 | #ifdef CONFIG_SCHED_SMT |
202 | static inline const struct cpumask *cpu_smt_mask(int cpu) | 202 | static inline const struct cpumask *cpu_smt_mask(int cpu) |
203 | { | 203 | { |
204 | return topology_thread_cpumask(cpu); | 204 | return topology_sibling_cpumask(cpu); |
205 | } | 205 | } |
206 | #endif | 206 | #endif |
207 | 207 | ||
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ecd3319dac33..ae572c138607 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h | |||
@@ -1,21 +1,30 @@ | |||
1 | #ifndef __LINUX_UACCESS_H__ | 1 | #ifndef __LINUX_UACCESS_H__ |
2 | #define __LINUX_UACCESS_H__ | 2 | #define __LINUX_UACCESS_H__ |
3 | 3 | ||
4 | #include <linux/preempt.h> | 4 | #include <linux/sched.h> |
5 | #include <asm/uaccess.h> | 5 | #include <asm/uaccess.h> |
6 | 6 | ||
7 | static __always_inline void pagefault_disabled_inc(void) | ||
8 | { | ||
9 | current->pagefault_disabled++; | ||
10 | } | ||
11 | |||
12 | static __always_inline void pagefault_disabled_dec(void) | ||
13 | { | ||
14 | current->pagefault_disabled--; | ||
15 | WARN_ON(current->pagefault_disabled < 0); | ||
16 | } | ||
17 | |||
7 | /* | 18 | /* |
8 | * These routines enable/disable the pagefault handler in that | 19 | * These routines enable/disable the pagefault handler. If disabled, it will |
9 | * it will not take any locks and go straight to the fixup table. | 20 | * not take any locks and go straight to the fixup table. |
10 | * | 21 | * |
11 | * They have great resemblance to the preempt_disable/enable calls | 22 | * User access methods will not sleep when called from a pagefault_disabled() |
12 | * and in fact they are identical; this is because currently there is | 23 | * environment. |
13 | * no other way to make the pagefault handlers do this. So we do | ||
14 | * disable preemption but we don't necessarily care about that. | ||
15 | */ | 24 | */ |
16 | static inline void pagefault_disable(void) | 25 | static inline void pagefault_disable(void) |
17 | { | 26 | { |
18 | preempt_count_inc(); | 27 | pagefault_disabled_inc(); |
19 | /* | 28 | /* |
20 | * make sure to have issued the store before a pagefault | 29 | * make sure to have issued the store before a pagefault |
21 | * can hit. | 30 | * can hit. |
@@ -25,18 +34,31 @@ static inline void pagefault_disable(void) | |||
25 | 34 | ||
26 | static inline void pagefault_enable(void) | 35 | static inline void pagefault_enable(void) |
27 | { | 36 | { |
28 | #ifndef CONFIG_PREEMPT | ||
29 | /* | 37 | /* |
30 | * make sure to issue those last loads/stores before enabling | 38 | * make sure to issue those last loads/stores before enabling |
31 | * the pagefault handler again. | 39 | * the pagefault handler again. |
32 | */ | 40 | */ |
33 | barrier(); | 41 | barrier(); |
34 | preempt_count_dec(); | 42 | pagefault_disabled_dec(); |
35 | #else | ||
36 | preempt_enable(); | ||
37 | #endif | ||
38 | } | 43 | } |
39 | 44 | ||
45 | /* | ||
46 | * Is the pagefault handler disabled? If so, user access methods will not sleep. | ||
47 | */ | ||
48 | #define pagefault_disabled() (current->pagefault_disabled != 0) | ||
49 | |||
50 | /* | ||
51 | * The pagefault handler is in general disabled by pagefault_disable() or | ||
52 | * when in irq context (via in_atomic()). | ||
53 | * | ||
54 | * This function should only be used by the fault handlers. Other users should | ||
55 | * stick to pagefault_disabled(). | ||
56 | * Please NEVER use preempt_disable() to disable the fault handler. With | ||
57 | * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. | ||
58 | * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. | ||
59 | */ | ||
60 | #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) | ||
61 | |||
40 | #ifndef ARCH_HAS_NOCACHE_UACCESS | 62 | #ifndef ARCH_HAS_NOCACHE_UACCESS |
41 | 63 | ||
42 | static inline unsigned long __copy_from_user_inatomic_nocache(void *to, | 64 | static inline unsigned long __copy_from_user_inatomic_nocache(void *to, |
diff --git a/include/linux/wait.h b/include/linux/wait.h index 2db83349865b..d69ac4ecc88b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -969,7 +969,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *); | |||
969 | * on that signal. | 969 | * on that signal. |
970 | */ | 970 | */ |
971 | static inline int | 971 | static inline int |
972 | wait_on_bit(void *word, int bit, unsigned mode) | 972 | wait_on_bit(unsigned long *word, int bit, unsigned mode) |
973 | { | 973 | { |
974 | might_sleep(); | 974 | might_sleep(); |
975 | if (!test_bit(bit, word)) | 975 | if (!test_bit(bit, word)) |
@@ -994,7 +994,7 @@ wait_on_bit(void *word, int bit, unsigned mode) | |||
994 | * on that signal. | 994 | * on that signal. |
995 | */ | 995 | */ |
996 | static inline int | 996 | static inline int |
997 | wait_on_bit_io(void *word, int bit, unsigned mode) | 997 | wait_on_bit_io(unsigned long *word, int bit, unsigned mode) |
998 | { | 998 | { |
999 | might_sleep(); | 999 | might_sleep(); |
1000 | if (!test_bit(bit, word)) | 1000 | if (!test_bit(bit, word)) |
@@ -1020,7 +1020,8 @@ wait_on_bit_io(void *word, int bit, unsigned mode) | |||
1020 | * received a signal and the mode permitted wakeup on that signal. | 1020 | * received a signal and the mode permitted wakeup on that signal. |
1021 | */ | 1021 | */ |
1022 | static inline int | 1022 | static inline int |
1023 | wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) | 1023 | wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, |
1024 | unsigned long timeout) | ||
1024 | { | 1025 | { |
1025 | might_sleep(); | 1026 | might_sleep(); |
1026 | if (!test_bit(bit, word)) | 1027 | if (!test_bit(bit, word)) |
@@ -1047,7 +1048,8 @@ wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) | |||
1047 | * on that signal. | 1048 | * on that signal. |
1048 | */ | 1049 | */ |
1049 | static inline int | 1050 | static inline int |
1050 | wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) | 1051 | wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, |
1052 | unsigned mode) | ||
1051 | { | 1053 | { |
1052 | might_sleep(); | 1054 | might_sleep(); |
1053 | if (!test_bit(bit, word)) | 1055 | if (!test_bit(bit, word)) |
@@ -1075,7 +1077,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode | |||
1075 | * the @mode allows that signal to wake the process. | 1077 | * the @mode allows that signal to wake the process. |
1076 | */ | 1078 | */ |
1077 | static inline int | 1079 | static inline int |
1078 | wait_on_bit_lock(void *word, int bit, unsigned mode) | 1080 | wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) |
1079 | { | 1081 | { |
1080 | might_sleep(); | 1082 | might_sleep(); |
1081 | if (!test_and_set_bit(bit, word)) | 1083 | if (!test_and_set_bit(bit, word)) |
@@ -1099,7 +1101,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode) | |||
1099 | * the @mode allows that signal to wake the process. | 1101 | * the @mode allows that signal to wake the process. |
1100 | */ | 1102 | */ |
1101 | static inline int | 1103 | static inline int |
1102 | wait_on_bit_lock_io(void *word, int bit, unsigned mode) | 1104 | wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) |
1103 | { | 1105 | { |
1104 | might_sleep(); | 1106 | might_sleep(); |
1105 | if (!test_and_set_bit(bit, word)) | 1107 | if (!test_and_set_bit(bit, word)) |
@@ -1125,7 +1127,8 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode) | |||
1125 | * the @mode allows that signal to wake the process. | 1127 | * the @mode allows that signal to wake the process. |
1126 | */ | 1128 | */ |
1127 | static inline int | 1129 | static inline int |
1128 | wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) | 1130 | wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, |
1131 | unsigned mode) | ||
1129 | { | 1132 | { |
1130 | might_sleep(); | 1133 | might_sleep(); |
1131 | if (!test_and_set_bit(bit, word)) | 1134 | if (!test_and_set_bit(bit, word)) |
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 30fedaf3e56a..d57a575fe31f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h | |||
@@ -147,7 +147,8 @@ TRACE_EVENT(sched_switch, | |||
147 | __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", | 147 | __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", |
148 | { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, | 148 | { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, |
149 | { 16, "Z" }, { 32, "X" }, { 64, "x" }, | 149 | { 16, "Z" }, { 32, "X" }, { 64, "x" }, |
150 | { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", | 150 | { 128, "K" }, { 256, "W" }, { 512, "P" }, |
151 | { 1024, "N" }) : "R", | ||
151 | __entry->prev_state & TASK_STATE_MAX ? "+" : "", | 152 | __entry->prev_state & TASK_STATE_MAX ? "+" : "", |
152 | __entry->next_comm, __entry->next_pid, __entry->next_prio) | 153 | __entry->next_comm, __entry->next_pid, __entry->next_prio) |
153 | ); | 154 | ); |
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3aaea7ffd077..a24ba9fe5bb8 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
@@ -47,8 +47,7 @@ | |||
47 | #define RECV 1 | 47 | #define RECV 1 |
48 | 48 | ||
49 | #define STATE_NONE 0 | 49 | #define STATE_NONE 0 |
50 | #define STATE_PENDING 1 | 50 | #define STATE_READY 1 |
51 | #define STATE_READY 2 | ||
52 | 51 | ||
53 | struct posix_msg_tree_node { | 52 | struct posix_msg_tree_node { |
54 | struct rb_node rb_node; | 53 | struct rb_node rb_node; |
@@ -571,15 +570,12 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, | |||
571 | wq_add(info, sr, ewp); | 570 | wq_add(info, sr, ewp); |
572 | 571 | ||
573 | for (;;) { | 572 | for (;;) { |
574 | set_current_state(TASK_INTERRUPTIBLE); | 573 | __set_current_state(TASK_INTERRUPTIBLE); |
575 | 574 | ||
576 | spin_unlock(&info->lock); | 575 | spin_unlock(&info->lock); |
577 | time = schedule_hrtimeout_range_clock(timeout, 0, | 576 | time = schedule_hrtimeout_range_clock(timeout, 0, |
578 | HRTIMER_MODE_ABS, CLOCK_REALTIME); | 577 | HRTIMER_MODE_ABS, CLOCK_REALTIME); |
579 | 578 | ||
580 | while (ewp->state == STATE_PENDING) | ||
581 | cpu_relax(); | ||
582 | |||
583 | if (ewp->state == STATE_READY) { | 579 | if (ewp->state == STATE_READY) { |
584 | retval = 0; | 580 | retval = 0; |
585 | goto out; | 581 | goto out; |
@@ -907,11 +903,15 @@ out_name: | |||
907 | * list of waiting receivers. A sender checks that list before adding the new | 903 | * list of waiting receivers. A sender checks that list before adding the new |
908 | * message into the message array. If there is a waiting receiver, then it | 904 | * message into the message array. If there is a waiting receiver, then it |
909 | * bypasses the message array and directly hands the message over to the | 905 | * bypasses the message array and directly hands the message over to the |
910 | * receiver. | 906 | * receiver. The receiver accepts the message and returns without grabbing the |
911 | * The receiver accepts the message and returns without grabbing the queue | 907 | * queue spinlock: |
912 | * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers | 908 | * |
913 | * are necessary. The same algorithm is used for sysv semaphores, see | 909 | * - Set pointer to message. |
914 | * ipc/sem.c for more details. | 910 | * - Queue the receiver task for later wakeup (without the info->lock). |
911 | * - Update its state to STATE_READY. Now the receiver can continue. | ||
912 | * - Wake up the process after the lock is dropped. Should the process wake up | ||
913 | * before this wakeup (due to a timeout or a signal) it will either see | ||
914 | * STATE_READY and continue or acquire the lock to check the state again. | ||
915 | * | 915 | * |
916 | * The same algorithm is used for senders. | 916 | * The same algorithm is used for senders. |
917 | */ | 917 | */ |
@@ -919,21 +919,29 @@ out_name: | |||
919 | /* pipelined_send() - send a message directly to the task waiting in | 919 | /* pipelined_send() - send a message directly to the task waiting in |
920 | * sys_mq_timedreceive() (without inserting message into a queue). | 920 | * sys_mq_timedreceive() (without inserting message into a queue). |
921 | */ | 921 | */ |
922 | static inline void pipelined_send(struct mqueue_inode_info *info, | 922 | static inline void pipelined_send(struct wake_q_head *wake_q, |
923 | struct mqueue_inode_info *info, | ||
923 | struct msg_msg *message, | 924 | struct msg_msg *message, |
924 | struct ext_wait_queue *receiver) | 925 | struct ext_wait_queue *receiver) |
925 | { | 926 | { |
926 | receiver->msg = message; | 927 | receiver->msg = message; |
927 | list_del(&receiver->list); | 928 | list_del(&receiver->list); |
928 | receiver->state = STATE_PENDING; | 929 | wake_q_add(wake_q, receiver->task); |
929 | wake_up_process(receiver->task); | 930 | /* |
930 | smp_wmb(); | 931 | * Rely on the implicit cmpxchg barrier from wake_q_add such |
932 | * that we can ensure that updating receiver->state is the last | ||
933 | * write operation: As once set, the receiver can continue, | ||
934 | * and if we don't have the reference count from the wake_q, | ||
935 | * yet, at that point we can later have a use-after-free | ||
936 | * condition and bogus wakeup. | ||
937 | */ | ||
931 | receiver->state = STATE_READY; | 938 | receiver->state = STATE_READY; |
932 | } | 939 | } |
933 | 940 | ||
934 | /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() | 941 | /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() |
935 | * gets its message and put to the queue (we have one free place for sure). */ | 942 | * gets its message and put to the queue (we have one free place for sure). */ |
936 | static inline void pipelined_receive(struct mqueue_inode_info *info) | 943 | static inline void pipelined_receive(struct wake_q_head *wake_q, |
944 | struct mqueue_inode_info *info) | ||
937 | { | 945 | { |
938 | struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND); | 946 | struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND); |
939 | 947 | ||
@@ -944,10 +952,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info) | |||
944 | } | 952 | } |
945 | if (msg_insert(sender->msg, info)) | 953 | if (msg_insert(sender->msg, info)) |
946 | return; | 954 | return; |
955 | |||
947 | list_del(&sender->list); | 956 | list_del(&sender->list); |
948 | sender->state = STATE_PENDING; | 957 | wake_q_add(wake_q, sender->task); |
949 | wake_up_process(sender->task); | ||
950 | smp_wmb(); | ||
951 | sender->state = STATE_READY; | 958 | sender->state = STATE_READY; |
952 | } | 959 | } |
953 | 960 | ||
@@ -965,6 +972,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
965 | struct timespec ts; | 972 | struct timespec ts; |
966 | struct posix_msg_tree_node *new_leaf = NULL; | 973 | struct posix_msg_tree_node *new_leaf = NULL; |
967 | int ret = 0; | 974 | int ret = 0; |
975 | WAKE_Q(wake_q); | ||
968 | 976 | ||
969 | if (u_abs_timeout) { | 977 | if (u_abs_timeout) { |
970 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); | 978 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
@@ -1049,7 +1057,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
1049 | } else { | 1057 | } else { |
1050 | receiver = wq_get_first_waiter(info, RECV); | 1058 | receiver = wq_get_first_waiter(info, RECV); |
1051 | if (receiver) { | 1059 | if (receiver) { |
1052 | pipelined_send(info, msg_ptr, receiver); | 1060 | pipelined_send(&wake_q, info, msg_ptr, receiver); |
1053 | } else { | 1061 | } else { |
1054 | /* adds message to the queue */ | 1062 | /* adds message to the queue */ |
1055 | ret = msg_insert(msg_ptr, info); | 1063 | ret = msg_insert(msg_ptr, info); |
@@ -1062,6 +1070,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
1062 | } | 1070 | } |
1063 | out_unlock: | 1071 | out_unlock: |
1064 | spin_unlock(&info->lock); | 1072 | spin_unlock(&info->lock); |
1073 | wake_up_q(&wake_q); | ||
1065 | out_free: | 1074 | out_free: |
1066 | if (ret) | 1075 | if (ret) |
1067 | free_msg(msg_ptr); | 1076 | free_msg(msg_ptr); |
@@ -1149,14 +1158,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
1149 | msg_ptr = wait.msg; | 1158 | msg_ptr = wait.msg; |
1150 | } | 1159 | } |
1151 | } else { | 1160 | } else { |
1161 | WAKE_Q(wake_q); | ||
1162 | |||
1152 | msg_ptr = msg_get(info); | 1163 | msg_ptr = msg_get(info); |
1153 | 1164 | ||
1154 | inode->i_atime = inode->i_mtime = inode->i_ctime = | 1165 | inode->i_atime = inode->i_mtime = inode->i_ctime = |
1155 | CURRENT_TIME; | 1166 | CURRENT_TIME; |
1156 | 1167 | ||
1157 | /* There is now free space in queue. */ | 1168 | /* There is now free space in queue. */ |
1158 | pipelined_receive(info); | 1169 | pipelined_receive(&wake_q, info); |
1159 | spin_unlock(&info->lock); | 1170 | spin_unlock(&info->lock); |
1171 | wake_up_q(&wake_q); | ||
1160 | ret = 0; | 1172 | ret = 0; |
1161 | } | 1173 | } |
1162 | if (ret == 0) { | 1174 | if (ret == 0) { |
diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef5..0bb88b555550 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1091,10 +1091,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
1091 | { | 1091 | { |
1092 | unsigned long cpu_limit; | 1092 | unsigned long cpu_limit; |
1093 | 1093 | ||
1094 | /* Thread group counters. */ | 1094 | cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
1095 | thread_group_cputime_init(sig); | ||
1096 | |||
1097 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
1098 | if (cpu_limit != RLIM_INFINITY) { | 1095 | if (cpu_limit != RLIM_INFINITY) { |
1099 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); | 1096 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
1100 | sig->cputimer.running = 1; | 1097 | sig->cputimer.running = 1; |
@@ -1396,6 +1393,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1396 | p->hardirq_context = 0; | 1393 | p->hardirq_context = 0; |
1397 | p->softirq_context = 0; | 1394 | p->softirq_context = 0; |
1398 | #endif | 1395 | #endif |
1396 | |||
1397 | p->pagefault_disabled = 0; | ||
1398 | |||
1399 | #ifdef CONFIG_LOCKDEP | 1399 | #ifdef CONFIG_LOCKDEP |
1400 | p->lockdep_depth = 0; /* no locks held yet */ | 1400 | p->lockdep_depth = 0; /* no locks held yet */ |
1401 | p->curr_chain_key = 0; | 1401 | p->curr_chain_key = 0; |
diff --git a/kernel/futex.c b/kernel/futex.c index 55ca63ad9622..aacc706f85fc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1090,9 +1090,11 @@ static void __unqueue_futex(struct futex_q *q) | |||
1090 | 1090 | ||
1091 | /* | 1091 | /* |
1092 | * The hash bucket lock must be held when this is called. | 1092 | * The hash bucket lock must be held when this is called. |
1093 | * Afterwards, the futex_q must not be accessed. | 1093 | * Afterwards, the futex_q must not be accessed. Callers |
1094 | * must ensure to later call wake_up_q() for the actual | ||
1095 | * wakeups to occur. | ||
1094 | */ | 1096 | */ |
1095 | static void wake_futex(struct futex_q *q) | 1097 | static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) |
1096 | { | 1098 | { |
1097 | struct task_struct *p = q->task; | 1099 | struct task_struct *p = q->task; |
1098 | 1100 | ||
@@ -1100,14 +1102,10 @@ static void wake_futex(struct futex_q *q) | |||
1100 | return; | 1102 | return; |
1101 | 1103 | ||
1102 | /* | 1104 | /* |
1103 | * We set q->lock_ptr = NULL _before_ we wake up the task. If | 1105 | * Queue the task for later wakeup for after we've released |
1104 | * a non-futex wake up happens on another CPU then the task | 1106 | * the hb->lock. wake_q_add() grabs reference to p. |
1105 | * might exit and p would dereference a non-existing task | ||
1106 | * struct. Prevent this by holding a reference on p across the | ||
1107 | * wake up. | ||
1108 | */ | 1107 | */ |
1109 | get_task_struct(p); | 1108 | wake_q_add(wake_q, p); |
1110 | |||
1111 | __unqueue_futex(q); | 1109 | __unqueue_futex(q); |
1112 | /* | 1110 | /* |
1113 | * The waiting task can free the futex_q as soon as | 1111 | * The waiting task can free the futex_q as soon as |
@@ -1117,9 +1115,6 @@ static void wake_futex(struct futex_q *q) | |||
1117 | */ | 1115 | */ |
1118 | smp_wmb(); | 1116 | smp_wmb(); |
1119 | q->lock_ptr = NULL; | 1117 | q->lock_ptr = NULL; |
1120 | |||
1121 | wake_up_state(p, TASK_NORMAL); | ||
1122 | put_task_struct(p); | ||
1123 | } | 1118 | } |
1124 | 1119 | ||
1125 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | 1120 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) |
@@ -1217,6 +1212,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) | |||
1217 | struct futex_q *this, *next; | 1212 | struct futex_q *this, *next; |
1218 | union futex_key key = FUTEX_KEY_INIT; | 1213 | union futex_key key = FUTEX_KEY_INIT; |
1219 | int ret; | 1214 | int ret; |
1215 | WAKE_Q(wake_q); | ||
1220 | 1216 | ||
1221 | if (!bitset) | 1217 | if (!bitset) |
1222 | return -EINVAL; | 1218 | return -EINVAL; |
@@ -1244,13 +1240,14 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) | |||
1244 | if (!(this->bitset & bitset)) | 1240 | if (!(this->bitset & bitset)) |
1245 | continue; | 1241 | continue; |
1246 | 1242 | ||
1247 | wake_futex(this); | 1243 | mark_wake_futex(&wake_q, this); |
1248 | if (++ret >= nr_wake) | 1244 | if (++ret >= nr_wake) |
1249 | break; | 1245 | break; |
1250 | } | 1246 | } |
1251 | } | 1247 | } |
1252 | 1248 | ||
1253 | spin_unlock(&hb->lock); | 1249 | spin_unlock(&hb->lock); |
1250 | wake_up_q(&wake_q); | ||
1254 | out_put_key: | 1251 | out_put_key: |
1255 | put_futex_key(&key); | 1252 | put_futex_key(&key); |
1256 | out: | 1253 | out: |
@@ -1269,6 +1266,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, | |||
1269 | struct futex_hash_bucket *hb1, *hb2; | 1266 | struct futex_hash_bucket *hb1, *hb2; |
1270 | struct futex_q *this, *next; | 1267 | struct futex_q *this, *next; |
1271 | int ret, op_ret; | 1268 | int ret, op_ret; |
1269 | WAKE_Q(wake_q); | ||
1272 | 1270 | ||
1273 | retry: | 1271 | retry: |
1274 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); | 1272 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); |
@@ -1320,7 +1318,7 @@ retry_private: | |||
1320 | ret = -EINVAL; | 1318 | ret = -EINVAL; |
1321 | goto out_unlock; | 1319 | goto out_unlock; |
1322 | } | 1320 | } |
1323 | wake_futex(this); | 1321 | mark_wake_futex(&wake_q, this); |
1324 | if (++ret >= nr_wake) | 1322 | if (++ret >= nr_wake) |
1325 | break; | 1323 | break; |
1326 | } | 1324 | } |
@@ -1334,7 +1332,7 @@ retry_private: | |||
1334 | ret = -EINVAL; | 1332 | ret = -EINVAL; |
1335 | goto out_unlock; | 1333 | goto out_unlock; |
1336 | } | 1334 | } |
1337 | wake_futex(this); | 1335 | mark_wake_futex(&wake_q, this); |
1338 | if (++op_ret >= nr_wake2) | 1336 | if (++op_ret >= nr_wake2) |
1339 | break; | 1337 | break; |
1340 | } | 1338 | } |
@@ -1344,6 +1342,7 @@ retry_private: | |||
1344 | 1342 | ||
1345 | out_unlock: | 1343 | out_unlock: |
1346 | double_unlock_hb(hb1, hb2); | 1344 | double_unlock_hb(hb1, hb2); |
1345 | wake_up_q(&wake_q); | ||
1347 | out_put_keys: | 1346 | out_put_keys: |
1348 | put_futex_key(&key2); | 1347 | put_futex_key(&key2); |
1349 | out_put_key1: | 1348 | out_put_key1: |
@@ -1503,6 +1502,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1503 | struct futex_pi_state *pi_state = NULL; | 1502 | struct futex_pi_state *pi_state = NULL; |
1504 | struct futex_hash_bucket *hb1, *hb2; | 1503 | struct futex_hash_bucket *hb1, *hb2; |
1505 | struct futex_q *this, *next; | 1504 | struct futex_q *this, *next; |
1505 | WAKE_Q(wake_q); | ||
1506 | 1506 | ||
1507 | if (requeue_pi) { | 1507 | if (requeue_pi) { |
1508 | /* | 1508 | /* |
@@ -1679,7 +1679,7 @@ retry_private: | |||
1679 | * woken by futex_unlock_pi(). | 1679 | * woken by futex_unlock_pi(). |
1680 | */ | 1680 | */ |
1681 | if (++task_count <= nr_wake && !requeue_pi) { | 1681 | if (++task_count <= nr_wake && !requeue_pi) { |
1682 | wake_futex(this); | 1682 | mark_wake_futex(&wake_q, this); |
1683 | continue; | 1683 | continue; |
1684 | } | 1684 | } |
1685 | 1685 | ||
@@ -1719,6 +1719,7 @@ retry_private: | |||
1719 | out_unlock: | 1719 | out_unlock: |
1720 | free_pi_state(pi_state); | 1720 | free_pi_state(pi_state); |
1721 | double_unlock_hb(hb1, hb2); | 1721 | double_unlock_hb(hb1, hb2); |
1722 | wake_up_q(&wake_q); | ||
1722 | hb_waiters_dec(hb2); | 1723 | hb_waiters_dec(hb2); |
1723 | 1724 | ||
1724 | /* | 1725 | /* |
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c index 86ae2aebf004..951cfcd10b4a 100644 --- a/kernel/locking/lglock.c +++ b/kernel/locking/lglock.c | |||
@@ -60,6 +60,28 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu) | |||
60 | } | 60 | } |
61 | EXPORT_SYMBOL(lg_local_unlock_cpu); | 61 | EXPORT_SYMBOL(lg_local_unlock_cpu); |
62 | 62 | ||
63 | void lg_double_lock(struct lglock *lg, int cpu1, int cpu2) | ||
64 | { | ||
65 | BUG_ON(cpu1 == cpu2); | ||
66 | |||
67 | /* lock in cpu order, just like lg_global_lock */ | ||
68 | if (cpu2 < cpu1) | ||
69 | swap(cpu1, cpu2); | ||
70 | |||
71 | preempt_disable(); | ||
72 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | ||
73 | arch_spin_lock(per_cpu_ptr(lg->lock, cpu1)); | ||
74 | arch_spin_lock(per_cpu_ptr(lg->lock, cpu2)); | ||
75 | } | ||
76 | |||
77 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2) | ||
78 | { | ||
79 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
80 | arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1)); | ||
81 | arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2)); | ||
82 | preempt_enable(); | ||
83 | } | ||
84 | |||
63 | void lg_global_lock(struct lglock *lg) | 85 | void lg_global_lock(struct lglock *lg) |
64 | { | 86 | { |
65 | int i; | 87 | int i; |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 46be87024875..67687973ce80 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
@@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | |||
11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer | 11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer |
12 | endif | 12 | endif |
13 | 13 | ||
14 | obj-y += core.o proc.o clock.o cputime.o | 14 | obj-y += core.o loadavg.o clock.o cputime.o |
15 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | 15 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o |
16 | obj-y += wait.o completion.o idle.o | 16 | obj-y += wait.o completion.o idle.o |
17 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | 17 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index eae160dd669d..750ed601ddf7 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -1,5 +1,3 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2 | |||
3 | #include "sched.h" | 1 | #include "sched.h" |
4 | 2 | ||
5 | #include <linux/proc_fs.h> | 3 | #include <linux/proc_fs.h> |
@@ -141,7 +139,7 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) | |||
141 | 139 | ||
142 | p->signal->autogroup = autogroup_kref_get(ag); | 140 | p->signal->autogroup = autogroup_kref_get(ag); |
143 | 141 | ||
144 | if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) | 142 | if (!READ_ONCE(sysctl_sched_autogroup_enabled)) |
145 | goto out; | 143 | goto out; |
146 | 144 | ||
147 | for_each_thread(p, t) | 145 | for_each_thread(p, t) |
@@ -249,5 +247,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen) | |||
249 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | 247 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
250 | } | 248 | } |
251 | #endif /* CONFIG_SCHED_DEBUG */ | 249 | #endif /* CONFIG_SCHED_DEBUG */ |
252 | |||
253 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h index 8bd047142816..890c95f2587a 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/auto_group.h | |||
@@ -29,7 +29,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg); | |||
29 | static inline struct task_group * | 29 | static inline struct task_group * |
30 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 30 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
31 | { | 31 | { |
32 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | 32 | int enabled = READ_ONCE(sysctl_sched_autogroup_enabled); |
33 | 33 | ||
34 | if (enabled && task_wants_autogroup(p, tg)) | 34 | if (enabled && task_wants_autogroup(p, tg)) |
35 | return p->signal->autogroup->tg; | 35 | return p->signal->autogroup->tg; |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index db9b10a78d74..f89ca9bcf42a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -511,7 +511,7 @@ static bool set_nr_and_not_polling(struct task_struct *p) | |||
511 | static bool set_nr_if_polling(struct task_struct *p) | 511 | static bool set_nr_if_polling(struct task_struct *p) |
512 | { | 512 | { |
513 | struct thread_info *ti = task_thread_info(p); | 513 | struct thread_info *ti = task_thread_info(p); |
514 | typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags); | 514 | typeof(ti->flags) old, val = READ_ONCE(ti->flags); |
515 | 515 | ||
516 | for (;;) { | 516 | for (;;) { |
517 | if (!(val & _TIF_POLLING_NRFLAG)) | 517 | if (!(val & _TIF_POLLING_NRFLAG)) |
@@ -541,6 +541,52 @@ static bool set_nr_if_polling(struct task_struct *p) | |||
541 | #endif | 541 | #endif |
542 | #endif | 542 | #endif |
543 | 543 | ||
544 | void wake_q_add(struct wake_q_head *head, struct task_struct *task) | ||
545 | { | ||
546 | struct wake_q_node *node = &task->wake_q; | ||
547 | |||
548 | /* | ||
549 | * Atomically grab the task, if ->wake_q is !nil already it means | ||
550 | * its already queued (either by us or someone else) and will get the | ||
551 | * wakeup due to that. | ||
552 | * | ||
553 | * This cmpxchg() implies a full barrier, which pairs with the write | ||
554 | * barrier implied by the wakeup in wake_up_list(). | ||
555 | */ | ||
556 | if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) | ||
557 | return; | ||
558 | |||
559 | get_task_struct(task); | ||
560 | |||
561 | /* | ||
562 | * The head is context local, there can be no concurrency. | ||
563 | */ | ||
564 | *head->lastp = node; | ||
565 | head->lastp = &node->next; | ||
566 | } | ||
567 | |||
568 | void wake_up_q(struct wake_q_head *head) | ||
569 | { | ||
570 | struct wake_q_node *node = head->first; | ||
571 | |||
572 | while (node != WAKE_Q_TAIL) { | ||
573 | struct task_struct *task; | ||
574 | |||
575 | task = container_of(node, struct task_struct, wake_q); | ||
576 | BUG_ON(!task); | ||
577 | /* task can safely be re-inserted now */ | ||
578 | node = node->next; | ||
579 | task->wake_q.next = NULL; | ||
580 | |||
581 | /* | ||
582 | * wake_up_process() implies a wmb() to pair with the queueing | ||
583 | * in wake_q_add() so as not to miss wakeups. | ||
584 | */ | ||
585 | wake_up_process(task); | ||
586 | put_task_struct(task); | ||
587 | } | ||
588 | } | ||
589 | |||
544 | /* | 590 | /* |
545 | * resched_curr - mark rq's current task 'to be rescheduled now'. | 591 | * resched_curr - mark rq's current task 'to be rescheduled now'. |
546 | * | 592 | * |
@@ -2105,12 +2151,15 @@ void wake_up_new_task(struct task_struct *p) | |||
2105 | 2151 | ||
2106 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2152 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2107 | 2153 | ||
2154 | static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE; | ||
2155 | |||
2108 | /** | 2156 | /** |
2109 | * preempt_notifier_register - tell me when current is being preempted & rescheduled | 2157 | * preempt_notifier_register - tell me when current is being preempted & rescheduled |
2110 | * @notifier: notifier struct to register | 2158 | * @notifier: notifier struct to register |
2111 | */ | 2159 | */ |
2112 | void preempt_notifier_register(struct preempt_notifier *notifier) | 2160 | void preempt_notifier_register(struct preempt_notifier *notifier) |
2113 | { | 2161 | { |
2162 | static_key_slow_inc(&preempt_notifier_key); | ||
2114 | hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); | 2163 | hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); |
2115 | } | 2164 | } |
2116 | EXPORT_SYMBOL_GPL(preempt_notifier_register); | 2165 | EXPORT_SYMBOL_GPL(preempt_notifier_register); |
@@ -2119,15 +2168,16 @@ EXPORT_SYMBOL_GPL(preempt_notifier_register); | |||
2119 | * preempt_notifier_unregister - no longer interested in preemption notifications | 2168 | * preempt_notifier_unregister - no longer interested in preemption notifications |
2120 | * @notifier: notifier struct to unregister | 2169 | * @notifier: notifier struct to unregister |
2121 | * | 2170 | * |
2122 | * This is safe to call from within a preemption notifier. | 2171 | * This is *not* safe to call from within a preemption notifier. |
2123 | */ | 2172 | */ |
2124 | void preempt_notifier_unregister(struct preempt_notifier *notifier) | 2173 | void preempt_notifier_unregister(struct preempt_notifier *notifier) |
2125 | { | 2174 | { |
2126 | hlist_del(¬ifier->link); | 2175 | hlist_del(¬ifier->link); |
2176 | static_key_slow_dec(&preempt_notifier_key); | ||
2127 | } | 2177 | } |
2128 | EXPORT_SYMBOL_GPL(preempt_notifier_unregister); | 2178 | EXPORT_SYMBOL_GPL(preempt_notifier_unregister); |
2129 | 2179 | ||
2130 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 2180 | static void __fire_sched_in_preempt_notifiers(struct task_struct *curr) |
2131 | { | 2181 | { |
2132 | struct preempt_notifier *notifier; | 2182 | struct preempt_notifier *notifier; |
2133 | 2183 | ||
@@ -2135,9 +2185,15 @@ static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | |||
2135 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); | 2185 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); |
2136 | } | 2186 | } |
2137 | 2187 | ||
2188 | static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) | ||
2189 | { | ||
2190 | if (static_key_false(&preempt_notifier_key)) | ||
2191 | __fire_sched_in_preempt_notifiers(curr); | ||
2192 | } | ||
2193 | |||
2138 | static void | 2194 | static void |
2139 | fire_sched_out_preempt_notifiers(struct task_struct *curr, | 2195 | __fire_sched_out_preempt_notifiers(struct task_struct *curr, |
2140 | struct task_struct *next) | 2196 | struct task_struct *next) |
2141 | { | 2197 | { |
2142 | struct preempt_notifier *notifier; | 2198 | struct preempt_notifier *notifier; |
2143 | 2199 | ||
@@ -2145,13 +2201,21 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2145 | notifier->ops->sched_out(notifier, next); | 2201 | notifier->ops->sched_out(notifier, next); |
2146 | } | 2202 | } |
2147 | 2203 | ||
2204 | static __always_inline void | ||
2205 | fire_sched_out_preempt_notifiers(struct task_struct *curr, | ||
2206 | struct task_struct *next) | ||
2207 | { | ||
2208 | if (static_key_false(&preempt_notifier_key)) | ||
2209 | __fire_sched_out_preempt_notifiers(curr, next); | ||
2210 | } | ||
2211 | |||
2148 | #else /* !CONFIG_PREEMPT_NOTIFIERS */ | 2212 | #else /* !CONFIG_PREEMPT_NOTIFIERS */ |
2149 | 2213 | ||
2150 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 2214 | static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
2151 | { | 2215 | { |
2152 | } | 2216 | } |
2153 | 2217 | ||
2154 | static void | 2218 | static inline void |
2155 | fire_sched_out_preempt_notifiers(struct task_struct *curr, | 2219 | fire_sched_out_preempt_notifiers(struct task_struct *curr, |
2156 | struct task_struct *next) | 2220 | struct task_struct *next) |
2157 | { | 2221 | { |
@@ -2397,9 +2461,9 @@ unsigned long nr_iowait_cpu(int cpu) | |||
2397 | 2461 | ||
2398 | void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) | 2462 | void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) |
2399 | { | 2463 | { |
2400 | struct rq *this = this_rq(); | 2464 | struct rq *rq = this_rq(); |
2401 | *nr_waiters = atomic_read(&this->nr_iowait); | 2465 | *nr_waiters = atomic_read(&rq->nr_iowait); |
2402 | *load = this->cpu_load[0]; | 2466 | *load = rq->load.weight; |
2403 | } | 2467 | } |
2404 | 2468 | ||
2405 | #ifdef CONFIG_SMP | 2469 | #ifdef CONFIG_SMP |
@@ -2497,6 +2561,7 @@ void scheduler_tick(void) | |||
2497 | update_rq_clock(rq); | 2561 | update_rq_clock(rq); |
2498 | curr->sched_class->task_tick(rq, curr, 0); | 2562 | curr->sched_class->task_tick(rq, curr, 0); |
2499 | update_cpu_load_active(rq); | 2563 | update_cpu_load_active(rq); |
2564 | calc_global_load_tick(rq); | ||
2500 | raw_spin_unlock(&rq->lock); | 2565 | raw_spin_unlock(&rq->lock); |
2501 | 2566 | ||
2502 | perf_event_task_tick(); | 2567 | perf_event_task_tick(); |
@@ -2525,7 +2590,7 @@ void scheduler_tick(void) | |||
2525 | u64 scheduler_tick_max_deferment(void) | 2590 | u64 scheduler_tick_max_deferment(void) |
2526 | { | 2591 | { |
2527 | struct rq *rq = this_rq(); | 2592 | struct rq *rq = this_rq(); |
2528 | unsigned long next, now = ACCESS_ONCE(jiffies); | 2593 | unsigned long next, now = READ_ONCE(jiffies); |
2529 | 2594 | ||
2530 | next = rq->last_sched_tick + HZ; | 2595 | next = rq->last_sched_tick + HZ; |
2531 | 2596 | ||
@@ -2726,9 +2791,7 @@ again: | |||
2726 | * - return from syscall or exception to user-space | 2791 | * - return from syscall or exception to user-space |
2727 | * - return from interrupt-handler to user-space | 2792 | * - return from interrupt-handler to user-space |
2728 | * | 2793 | * |
2729 | * WARNING: all callers must re-check need_resched() afterward and reschedule | 2794 | * WARNING: must be called with preemption disabled! |
2730 | * accordingly in case an event triggered the need for rescheduling (such as | ||
2731 | * an interrupt waking up a task) while preemption was disabled in __schedule(). | ||
2732 | */ | 2795 | */ |
2733 | static void __sched __schedule(void) | 2796 | static void __sched __schedule(void) |
2734 | { | 2797 | { |
@@ -2737,7 +2800,6 @@ static void __sched __schedule(void) | |||
2737 | struct rq *rq; | 2800 | struct rq *rq; |
2738 | int cpu; | 2801 | int cpu; |
2739 | 2802 | ||
2740 | preempt_disable(); | ||
2741 | cpu = smp_processor_id(); | 2803 | cpu = smp_processor_id(); |
2742 | rq = cpu_rq(cpu); | 2804 | rq = cpu_rq(cpu); |
2743 | rcu_note_context_switch(); | 2805 | rcu_note_context_switch(); |
@@ -2801,8 +2863,6 @@ static void __sched __schedule(void) | |||
2801 | raw_spin_unlock_irq(&rq->lock); | 2863 | raw_spin_unlock_irq(&rq->lock); |
2802 | 2864 | ||
2803 | post_schedule(rq); | 2865 | post_schedule(rq); |
2804 | |||
2805 | sched_preempt_enable_no_resched(); | ||
2806 | } | 2866 | } |
2807 | 2867 | ||
2808 | static inline void sched_submit_work(struct task_struct *tsk) | 2868 | static inline void sched_submit_work(struct task_struct *tsk) |
@@ -2823,7 +2883,9 @@ asmlinkage __visible void __sched schedule(void) | |||
2823 | 2883 | ||
2824 | sched_submit_work(tsk); | 2884 | sched_submit_work(tsk); |
2825 | do { | 2885 | do { |
2886 | preempt_disable(); | ||
2826 | __schedule(); | 2887 | __schedule(); |
2888 | sched_preempt_enable_no_resched(); | ||
2827 | } while (need_resched()); | 2889 | } while (need_resched()); |
2828 | } | 2890 | } |
2829 | EXPORT_SYMBOL(schedule); | 2891 | EXPORT_SYMBOL(schedule); |
@@ -2862,15 +2924,14 @@ void __sched schedule_preempt_disabled(void) | |||
2862 | static void __sched notrace preempt_schedule_common(void) | 2924 | static void __sched notrace preempt_schedule_common(void) |
2863 | { | 2925 | { |
2864 | do { | 2926 | do { |
2865 | __preempt_count_add(PREEMPT_ACTIVE); | 2927 | preempt_active_enter(); |
2866 | __schedule(); | 2928 | __schedule(); |
2867 | __preempt_count_sub(PREEMPT_ACTIVE); | 2929 | preempt_active_exit(); |
2868 | 2930 | ||
2869 | /* | 2931 | /* |
2870 | * Check again in case we missed a preemption opportunity | 2932 | * Check again in case we missed a preemption opportunity |
2871 | * between schedule and now. | 2933 | * between schedule and now. |
2872 | */ | 2934 | */ |
2873 | barrier(); | ||
2874 | } while (need_resched()); | 2935 | } while (need_resched()); |
2875 | } | 2936 | } |
2876 | 2937 | ||
@@ -2894,9 +2955,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
2894 | NOKPROBE_SYMBOL(preempt_schedule); | 2955 | NOKPROBE_SYMBOL(preempt_schedule); |
2895 | EXPORT_SYMBOL(preempt_schedule); | 2956 | EXPORT_SYMBOL(preempt_schedule); |
2896 | 2957 | ||
2897 | #ifdef CONFIG_CONTEXT_TRACKING | ||
2898 | /** | 2958 | /** |
2899 | * preempt_schedule_context - preempt_schedule called by tracing | 2959 | * preempt_schedule_notrace - preempt_schedule called by tracing |
2900 | * | 2960 | * |
2901 | * The tracing infrastructure uses preempt_enable_notrace to prevent | 2961 | * The tracing infrastructure uses preempt_enable_notrace to prevent |
2902 | * recursion and tracing preempt enabling caused by the tracing | 2962 | * recursion and tracing preempt enabling caused by the tracing |
@@ -2909,7 +2969,7 @@ EXPORT_SYMBOL(preempt_schedule); | |||
2909 | * instead of preempt_schedule() to exit user context if needed before | 2969 | * instead of preempt_schedule() to exit user context if needed before |
2910 | * calling the scheduler. | 2970 | * calling the scheduler. |
2911 | */ | 2971 | */ |
2912 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | 2972 | asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
2913 | { | 2973 | { |
2914 | enum ctx_state prev_ctx; | 2974 | enum ctx_state prev_ctx; |
2915 | 2975 | ||
@@ -2917,7 +2977,13 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void) | |||
2917 | return; | 2977 | return; |
2918 | 2978 | ||
2919 | do { | 2979 | do { |
2920 | __preempt_count_add(PREEMPT_ACTIVE); | 2980 | /* |
2981 | * Use raw __prempt_count() ops that don't call function. | ||
2982 | * We can't call functions before disabling preemption which | ||
2983 | * disarm preemption tracing recursions. | ||
2984 | */ | ||
2985 | __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); | ||
2986 | barrier(); | ||
2921 | /* | 2987 | /* |
2922 | * Needs preempt disabled in case user_exit() is traced | 2988 | * Needs preempt disabled in case user_exit() is traced |
2923 | * and the tracer calls preempt_enable_notrace() causing | 2989 | * and the tracer calls preempt_enable_notrace() causing |
@@ -2927,12 +2993,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void) | |||
2927 | __schedule(); | 2993 | __schedule(); |
2928 | exception_exit(prev_ctx); | 2994 | exception_exit(prev_ctx); |
2929 | 2995 | ||
2930 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
2931 | barrier(); | 2996 | barrier(); |
2997 | __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); | ||
2932 | } while (need_resched()); | 2998 | } while (need_resched()); |
2933 | } | 2999 | } |
2934 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | 3000 | EXPORT_SYMBOL_GPL(preempt_schedule_notrace); |
2935 | #endif /* CONFIG_CONTEXT_TRACKING */ | ||
2936 | 3001 | ||
2937 | #endif /* CONFIG_PREEMPT */ | 3002 | #endif /* CONFIG_PREEMPT */ |
2938 | 3003 | ||
@@ -2952,17 +3017,11 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) | |||
2952 | prev_state = exception_enter(); | 3017 | prev_state = exception_enter(); |
2953 | 3018 | ||
2954 | do { | 3019 | do { |
2955 | __preempt_count_add(PREEMPT_ACTIVE); | 3020 | preempt_active_enter(); |
2956 | local_irq_enable(); | 3021 | local_irq_enable(); |
2957 | __schedule(); | 3022 | __schedule(); |
2958 | local_irq_disable(); | 3023 | local_irq_disable(); |
2959 | __preempt_count_sub(PREEMPT_ACTIVE); | 3024 | preempt_active_exit(); |
2960 | |||
2961 | /* | ||
2962 | * Check again in case we missed a preemption opportunity | ||
2963 | * between schedule and now. | ||
2964 | */ | ||
2965 | barrier(); | ||
2966 | } while (need_resched()); | 3025 | } while (need_resched()); |
2967 | 3026 | ||
2968 | exception_exit(prev_state); | 3027 | exception_exit(prev_state); |
@@ -3040,7 +3099,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
3040 | if (!dl_prio(p->normal_prio) || | 3099 | if (!dl_prio(p->normal_prio) || |
3041 | (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { | 3100 | (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { |
3042 | p->dl.dl_boosted = 1; | 3101 | p->dl.dl_boosted = 1; |
3043 | p->dl.dl_throttled = 0; | ||
3044 | enqueue_flag = ENQUEUE_REPLENISH; | 3102 | enqueue_flag = ENQUEUE_REPLENISH; |
3045 | } else | 3103 | } else |
3046 | p->dl.dl_boosted = 0; | 3104 | p->dl.dl_boosted = 0; |
@@ -5314,7 +5372,7 @@ static struct notifier_block migration_notifier = { | |||
5314 | .priority = CPU_PRI_MIGRATION, | 5372 | .priority = CPU_PRI_MIGRATION, |
5315 | }; | 5373 | }; |
5316 | 5374 | ||
5317 | static void __cpuinit set_cpu_rq_start_time(void) | 5375 | static void set_cpu_rq_start_time(void) |
5318 | { | 5376 | { |
5319 | int cpu = smp_processor_id(); | 5377 | int cpu = smp_processor_id(); |
5320 | struct rq *rq = cpu_rq(cpu); | 5378 | struct rq *rq = cpu_rq(cpu); |
@@ -7734,11 +7792,11 @@ static long sched_group_rt_runtime(struct task_group *tg) | |||
7734 | return rt_runtime_us; | 7792 | return rt_runtime_us; |
7735 | } | 7793 | } |
7736 | 7794 | ||
7737 | static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | 7795 | static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us) |
7738 | { | 7796 | { |
7739 | u64 rt_runtime, rt_period; | 7797 | u64 rt_runtime, rt_period; |
7740 | 7798 | ||
7741 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | 7799 | rt_period = rt_period_us * NSEC_PER_USEC; |
7742 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 7800 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
7743 | 7801 | ||
7744 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 7802 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8394b1ee600c..f5a64ffad176 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -567,7 +567,7 @@ static void cputime_advance(cputime_t *counter, cputime_t new) | |||
567 | { | 567 | { |
568 | cputime_t old; | 568 | cputime_t old; |
569 | 569 | ||
570 | while (new > (old = ACCESS_ONCE(*counter))) | 570 | while (new > (old = READ_ONCE(*counter))) |
571 | cmpxchg_cputime(counter, old, new); | 571 | cmpxchg_cputime(counter, old, new); |
572 | } | 572 | } |
573 | 573 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5e95145088fd..392e8fb94db3 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -640,7 +640,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |||
640 | } | 640 | } |
641 | 641 | ||
642 | static | 642 | static |
643 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | 643 | int dl_runtime_exceeded(struct sched_dl_entity *dl_se) |
644 | { | 644 | { |
645 | return (dl_se->runtime <= 0); | 645 | return (dl_se->runtime <= 0); |
646 | } | 646 | } |
@@ -684,7 +684,7 @@ static void update_curr_dl(struct rq *rq) | |||
684 | sched_rt_avg_update(rq, delta_exec); | 684 | sched_rt_avg_update(rq, delta_exec); |
685 | 685 | ||
686 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; | 686 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; |
687 | if (dl_runtime_exceeded(rq, dl_se)) { | 687 | if (dl_runtime_exceeded(dl_se)) { |
688 | dl_se->dl_throttled = 1; | 688 | dl_se->dl_throttled = 1; |
689 | __dequeue_task_dl(rq, curr, 0); | 689 | __dequeue_task_dl(rq, curr, 0); |
690 | if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted))) | 690 | if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted))) |
@@ -995,7 +995,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
995 | rq = cpu_rq(cpu); | 995 | rq = cpu_rq(cpu); |
996 | 996 | ||
997 | rcu_read_lock(); | 997 | rcu_read_lock(); |
998 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | 998 | curr = READ_ONCE(rq->curr); /* unlocked access */ |
999 | 999 | ||
1000 | /* | 1000 | /* |
1001 | * If we are dealing with a -deadline task, we must | 1001 | * If we are dealing with a -deadline task, we must |
@@ -1012,7 +1012,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
1012 | (p->nr_cpus_allowed > 1)) { | 1012 | (p->nr_cpus_allowed > 1)) { |
1013 | int target = find_later_rq(p); | 1013 | int target = find_later_rq(p); |
1014 | 1014 | ||
1015 | if (target != -1) | 1015 | if (target != -1 && |
1016 | dl_time_before(p->dl.deadline, | ||
1017 | cpu_rq(target)->dl.earliest_dl.curr)) | ||
1016 | cpu = target; | 1018 | cpu = target; |
1017 | } | 1019 | } |
1018 | rcu_read_unlock(); | 1020 | rcu_read_unlock(); |
@@ -1230,6 +1232,32 @@ next_node: | |||
1230 | return NULL; | 1232 | return NULL; |
1231 | } | 1233 | } |
1232 | 1234 | ||
1235 | /* | ||
1236 | * Return the earliest pushable rq's task, which is suitable to be executed | ||
1237 | * on the CPU, NULL otherwise: | ||
1238 | */ | ||
1239 | static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu) | ||
1240 | { | ||
1241 | struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost; | ||
1242 | struct task_struct *p = NULL; | ||
1243 | |||
1244 | if (!has_pushable_dl_tasks(rq)) | ||
1245 | return NULL; | ||
1246 | |||
1247 | next_node: | ||
1248 | if (next_node) { | ||
1249 | p = rb_entry(next_node, struct task_struct, pushable_dl_tasks); | ||
1250 | |||
1251 | if (pick_dl_task(rq, p, cpu)) | ||
1252 | return p; | ||
1253 | |||
1254 | next_node = rb_next(next_node); | ||
1255 | goto next_node; | ||
1256 | } | ||
1257 | |||
1258 | return NULL; | ||
1259 | } | ||
1260 | |||
1233 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | 1261 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); |
1234 | 1262 | ||
1235 | static int find_later_rq(struct task_struct *task) | 1263 | static int find_later_rq(struct task_struct *task) |
@@ -1333,6 +1361,17 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | |||
1333 | 1361 | ||
1334 | later_rq = cpu_rq(cpu); | 1362 | later_rq = cpu_rq(cpu); |
1335 | 1363 | ||
1364 | if (!dl_time_before(task->dl.deadline, | ||
1365 | later_rq->dl.earliest_dl.curr)) { | ||
1366 | /* | ||
1367 | * Target rq has tasks of equal or earlier deadline, | ||
1368 | * retrying does not release any lock and is unlikely | ||
1369 | * to yield a different result. | ||
1370 | */ | ||
1371 | later_rq = NULL; | ||
1372 | break; | ||
1373 | } | ||
1374 | |||
1336 | /* Retry if something changed. */ | 1375 | /* Retry if something changed. */ |
1337 | if (double_lock_balance(rq, later_rq)) { | 1376 | if (double_lock_balance(rq, later_rq)) { |
1338 | if (unlikely(task_rq(task) != rq || | 1377 | if (unlikely(task_rq(task) != rq || |
@@ -1514,7 +1553,7 @@ static int pull_dl_task(struct rq *this_rq) | |||
1514 | if (src_rq->dl.dl_nr_running <= 1) | 1553 | if (src_rq->dl.dl_nr_running <= 1) |
1515 | goto skip; | 1554 | goto skip; |
1516 | 1555 | ||
1517 | p = pick_next_earliest_dl_task(src_rq, this_cpu); | 1556 | p = pick_earliest_pushable_dl_task(src_rq, this_cpu); |
1518 | 1557 | ||
1519 | /* | 1558 | /* |
1520 | * We found a task to be pulled if: | 1559 | * We found a task to be pulled if: |
@@ -1659,7 +1698,7 @@ static void rq_offline_dl(struct rq *rq) | |||
1659 | cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); | 1698 | cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); |
1660 | } | 1699 | } |
1661 | 1700 | ||
1662 | void init_sched_dl_class(void) | 1701 | void __init init_sched_dl_class(void) |
1663 | { | 1702 | { |
1664 | unsigned int i; | 1703 | unsigned int i; |
1665 | 1704 | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index a245c1fc6f0a..704683cc9042 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -132,12 +132,14 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
132 | p->prio); | 132 | p->prio); |
133 | #ifdef CONFIG_SCHEDSTATS | 133 | #ifdef CONFIG_SCHEDSTATS |
134 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", | 134 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", |
135 | SPLIT_NS(p->se.vruntime), | 135 | SPLIT_NS(p->se.statistics.wait_sum), |
136 | SPLIT_NS(p->se.sum_exec_runtime), | 136 | SPLIT_NS(p->se.sum_exec_runtime), |
137 | SPLIT_NS(p->se.statistics.sum_sleep_runtime)); | 137 | SPLIT_NS(p->se.statistics.sum_sleep_runtime)); |
138 | #else | 138 | #else |
139 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", | 139 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", |
140 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 140 | 0LL, 0L, |
141 | SPLIT_NS(p->se.sum_exec_runtime), | ||
142 | 0LL, 0L); | ||
141 | #endif | 143 | #endif |
142 | #ifdef CONFIG_NUMA_BALANCING | 144 | #ifdef CONFIG_NUMA_BALANCING |
143 | SEQ_printf(m, " %d", task_node(p)); | 145 | SEQ_printf(m, " %d", task_node(p)); |
@@ -156,7 +158,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
156 | SEQ_printf(m, | 158 | SEQ_printf(m, |
157 | "\nrunnable tasks:\n" | 159 | "\nrunnable tasks:\n" |
158 | " task PID tree-key switches prio" | 160 | " task PID tree-key switches prio" |
159 | " exec-runtime sum-exec sum-sleep\n" | 161 | " wait-time sum-exec sum-sleep\n" |
160 | "------------------------------------------------------" | 162 | "------------------------------------------------------" |
161 | "----------------------------------------------------\n"); | 163 | "----------------------------------------------------\n"); |
162 | 164 | ||
@@ -582,6 +584,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
582 | nr_switches = p->nvcsw + p->nivcsw; | 584 | nr_switches = p->nvcsw + p->nivcsw; |
583 | 585 | ||
584 | #ifdef CONFIG_SCHEDSTATS | 586 | #ifdef CONFIG_SCHEDSTATS |
587 | PN(se.statistics.sum_sleep_runtime); | ||
585 | PN(se.statistics.wait_start); | 588 | PN(se.statistics.wait_start); |
586 | PN(se.statistics.sleep_start); | 589 | PN(se.statistics.sleep_start); |
587 | PN(se.statistics.block_start); | 590 | PN(se.statistics.block_start); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2980e8733bc..433061d984ea 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -141,9 +141,9 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) | |||
141 | * | 141 | * |
142 | * This idea comes from the SD scheduler of Con Kolivas: | 142 | * This idea comes from the SD scheduler of Con Kolivas: |
143 | */ | 143 | */ |
144 | static int get_update_sysctl_factor(void) | 144 | static unsigned int get_update_sysctl_factor(void) |
145 | { | 145 | { |
146 | unsigned int cpus = min_t(int, num_online_cpus(), 8); | 146 | unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); |
147 | unsigned int factor; | 147 | unsigned int factor; |
148 | 148 | ||
149 | switch (sysctl_sched_tunable_scaling) { | 149 | switch (sysctl_sched_tunable_scaling) { |
@@ -576,7 +576,7 @@ int sched_proc_update_handler(struct ctl_table *table, int write, | |||
576 | loff_t *ppos) | 576 | loff_t *ppos) |
577 | { | 577 | { |
578 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 578 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
579 | int factor = get_update_sysctl_factor(); | 579 | unsigned int factor = get_update_sysctl_factor(); |
580 | 580 | ||
581 | if (ret || !write) | 581 | if (ret || !write) |
582 | return ret; | 582 | return ret; |
@@ -834,7 +834,7 @@ static unsigned int task_nr_scan_windows(struct task_struct *p) | |||
834 | 834 | ||
835 | static unsigned int task_scan_min(struct task_struct *p) | 835 | static unsigned int task_scan_min(struct task_struct *p) |
836 | { | 836 | { |
837 | unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size); | 837 | unsigned int scan_size = READ_ONCE(sysctl_numa_balancing_scan_size); |
838 | unsigned int scan, floor; | 838 | unsigned int scan, floor; |
839 | unsigned int windows = 1; | 839 | unsigned int windows = 1; |
840 | 840 | ||
@@ -1198,11 +1198,9 @@ static void task_numa_assign(struct task_numa_env *env, | |||
1198 | static bool load_too_imbalanced(long src_load, long dst_load, | 1198 | static bool load_too_imbalanced(long src_load, long dst_load, |
1199 | struct task_numa_env *env) | 1199 | struct task_numa_env *env) |
1200 | { | 1200 | { |
1201 | long imb, old_imb; | ||
1202 | long orig_src_load, orig_dst_load; | ||
1201 | long src_capacity, dst_capacity; | 1203 | long src_capacity, dst_capacity; |
1202 | long orig_src_load; | ||
1203 | long load_a, load_b; | ||
1204 | long moved_load; | ||
1205 | long imb; | ||
1206 | 1204 | ||
1207 | /* | 1205 | /* |
1208 | * The load is corrected for the CPU capacity available on each node. | 1206 | * The load is corrected for the CPU capacity available on each node. |
@@ -1215,39 +1213,30 @@ static bool load_too_imbalanced(long src_load, long dst_load, | |||
1215 | dst_capacity = env->dst_stats.compute_capacity; | 1213 | dst_capacity = env->dst_stats.compute_capacity; |
1216 | 1214 | ||
1217 | /* We care about the slope of the imbalance, not the direction. */ | 1215 | /* We care about the slope of the imbalance, not the direction. */ |
1218 | load_a = dst_load; | 1216 | if (dst_load < src_load) |
1219 | load_b = src_load; | 1217 | swap(dst_load, src_load); |
1220 | if (load_a < load_b) | ||
1221 | swap(load_a, load_b); | ||
1222 | 1218 | ||
1223 | /* Is the difference below the threshold? */ | 1219 | /* Is the difference below the threshold? */ |
1224 | imb = load_a * src_capacity * 100 - | 1220 | imb = dst_load * src_capacity * 100 - |
1225 | load_b * dst_capacity * env->imbalance_pct; | 1221 | src_load * dst_capacity * env->imbalance_pct; |
1226 | if (imb <= 0) | 1222 | if (imb <= 0) |
1227 | return false; | 1223 | return false; |
1228 | 1224 | ||
1229 | /* | 1225 | /* |
1230 | * The imbalance is above the allowed threshold. | 1226 | * The imbalance is above the allowed threshold. |
1231 | * Allow a move that brings us closer to a balanced situation, | 1227 | * Compare it with the old imbalance. |
1232 | * without moving things past the point of balance. | ||
1233 | */ | 1228 | */ |
1234 | orig_src_load = env->src_stats.load; | 1229 | orig_src_load = env->src_stats.load; |
1230 | orig_dst_load = env->dst_stats.load; | ||
1235 | 1231 | ||
1236 | /* | 1232 | if (orig_dst_load < orig_src_load) |
1237 | * In a task swap, there will be one load moving from src to dst, | 1233 | swap(orig_dst_load, orig_src_load); |
1238 | * and another moving back. This is the net sum of both moves. | ||
1239 | * A simple task move will always have a positive value. | ||
1240 | * Allow the move if it brings the system closer to a balanced | ||
1241 | * situation, without crossing over the balance point. | ||
1242 | */ | ||
1243 | moved_load = orig_src_load - src_load; | ||
1244 | 1234 | ||
1245 | if (moved_load > 0) | 1235 | old_imb = orig_dst_load * src_capacity * 100 - |
1246 | /* Moving src -> dst. Did we overshoot balance? */ | 1236 | orig_src_load * dst_capacity * env->imbalance_pct; |
1247 | return src_load * dst_capacity < dst_load * src_capacity; | 1237 | |
1248 | else | 1238 | /* Would this change make things worse? */ |
1249 | /* Moving dst -> src. Did we overshoot balance? */ | 1239 | return (imb > old_imb); |
1250 | return dst_load * src_capacity < src_load * dst_capacity; | ||
1251 | } | 1240 | } |
1252 | 1241 | ||
1253 | /* | 1242 | /* |
@@ -1409,6 +1398,30 @@ static void task_numa_find_cpu(struct task_numa_env *env, | |||
1409 | } | 1398 | } |
1410 | } | 1399 | } |
1411 | 1400 | ||
1401 | /* Only move tasks to a NUMA node less busy than the current node. */ | ||
1402 | static bool numa_has_capacity(struct task_numa_env *env) | ||
1403 | { | ||
1404 | struct numa_stats *src = &env->src_stats; | ||
1405 | struct numa_stats *dst = &env->dst_stats; | ||
1406 | |||
1407 | if (src->has_free_capacity && !dst->has_free_capacity) | ||
1408 | return false; | ||
1409 | |||
1410 | /* | ||
1411 | * Only consider a task move if the source has a higher load | ||
1412 | * than the destination, corrected for CPU capacity on each node. | ||
1413 | * | ||
1414 | * src->load dst->load | ||
1415 | * --------------------- vs --------------------- | ||
1416 | * src->compute_capacity dst->compute_capacity | ||
1417 | */ | ||
1418 | if (src->load * dst->compute_capacity > | ||
1419 | dst->load * src->compute_capacity) | ||
1420 | return true; | ||
1421 | |||
1422 | return false; | ||
1423 | } | ||
1424 | |||
1412 | static int task_numa_migrate(struct task_struct *p) | 1425 | static int task_numa_migrate(struct task_struct *p) |
1413 | { | 1426 | { |
1414 | struct task_numa_env env = { | 1427 | struct task_numa_env env = { |
@@ -1463,7 +1476,8 @@ static int task_numa_migrate(struct task_struct *p) | |||
1463 | update_numa_stats(&env.dst_stats, env.dst_nid); | 1476 | update_numa_stats(&env.dst_stats, env.dst_nid); |
1464 | 1477 | ||
1465 | /* Try to find a spot on the preferred nid. */ | 1478 | /* Try to find a spot on the preferred nid. */ |
1466 | task_numa_find_cpu(&env, taskimp, groupimp); | 1479 | if (numa_has_capacity(&env)) |
1480 | task_numa_find_cpu(&env, taskimp, groupimp); | ||
1467 | 1481 | ||
1468 | /* | 1482 | /* |
1469 | * Look at other nodes in these cases: | 1483 | * Look at other nodes in these cases: |
@@ -1494,7 +1508,8 @@ static int task_numa_migrate(struct task_struct *p) | |||
1494 | env.dist = dist; | 1508 | env.dist = dist; |
1495 | env.dst_nid = nid; | 1509 | env.dst_nid = nid; |
1496 | update_numa_stats(&env.dst_stats, env.dst_nid); | 1510 | update_numa_stats(&env.dst_stats, env.dst_nid); |
1497 | task_numa_find_cpu(&env, taskimp, groupimp); | 1511 | if (numa_has_capacity(&env)) |
1512 | task_numa_find_cpu(&env, taskimp, groupimp); | ||
1498 | } | 1513 | } |
1499 | } | 1514 | } |
1500 | 1515 | ||
@@ -1794,7 +1809,12 @@ static void task_numa_placement(struct task_struct *p) | |||
1794 | u64 runtime, period; | 1809 | u64 runtime, period; |
1795 | spinlock_t *group_lock = NULL; | 1810 | spinlock_t *group_lock = NULL; |
1796 | 1811 | ||
1797 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); | 1812 | /* |
1813 | * The p->mm->numa_scan_seq field gets updated without | ||
1814 | * exclusive access. Use READ_ONCE() here to ensure | ||
1815 | * that the field is read in a single access: | ||
1816 | */ | ||
1817 | seq = READ_ONCE(p->mm->numa_scan_seq); | ||
1798 | if (p->numa_scan_seq == seq) | 1818 | if (p->numa_scan_seq == seq) |
1799 | return; | 1819 | return; |
1800 | p->numa_scan_seq = seq; | 1820 | p->numa_scan_seq = seq; |
@@ -1938,7 +1958,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1938 | } | 1958 | } |
1939 | 1959 | ||
1940 | rcu_read_lock(); | 1960 | rcu_read_lock(); |
1941 | tsk = ACCESS_ONCE(cpu_rq(cpu)->curr); | 1961 | tsk = READ_ONCE(cpu_rq(cpu)->curr); |
1942 | 1962 | ||
1943 | if (!cpupid_match_pid(tsk, cpupid)) | 1963 | if (!cpupid_match_pid(tsk, cpupid)) |
1944 | goto no_join; | 1964 | goto no_join; |
@@ -2107,7 +2127,15 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
2107 | 2127 | ||
2108 | static void reset_ptenuma_scan(struct task_struct *p) | 2128 | static void reset_ptenuma_scan(struct task_struct *p) |
2109 | { | 2129 | { |
2110 | ACCESS_ONCE(p->mm->numa_scan_seq)++; | 2130 | /* |
2131 | * We only did a read acquisition of the mmap sem, so | ||
2132 | * p->mm->numa_scan_seq is written to without exclusive access | ||
2133 | * and the update is not guaranteed to be atomic. That's not | ||
2134 | * much of an issue though, since this is just used for | ||
2135 | * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not | ||
2136 | * expensive, to avoid any form of compiler optimizations: | ||
2137 | */ | ||
2138 | WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1); | ||
2111 | p->mm->numa_scan_offset = 0; | 2139 | p->mm->numa_scan_offset = 0; |
2112 | } | 2140 | } |
2113 | 2141 | ||
@@ -4323,6 +4351,189 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
4323 | } | 4351 | } |
4324 | 4352 | ||
4325 | #ifdef CONFIG_SMP | 4353 | #ifdef CONFIG_SMP |
4354 | |||
4355 | /* | ||
4356 | * per rq 'load' arrray crap; XXX kill this. | ||
4357 | */ | ||
4358 | |||
4359 | /* | ||
4360 | * The exact cpuload at various idx values, calculated at every tick would be | ||
4361 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | ||
4362 | * | ||
4363 | * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called | ||
4364 | * on nth tick when cpu may be busy, then we have: | ||
4365 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
4366 | * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load | ||
4367 | * | ||
4368 | * decay_load_missed() below does efficient calculation of | ||
4369 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
4370 | * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load | ||
4371 | * | ||
4372 | * The calculation is approximated on a 128 point scale. | ||
4373 | * degrade_zero_ticks is the number of ticks after which load at any | ||
4374 | * particular idx is approximated to be zero. | ||
4375 | * degrade_factor is a precomputed table, a row for each load idx. | ||
4376 | * Each column corresponds to degradation factor for a power of two ticks, | ||
4377 | * based on 128 point scale. | ||
4378 | * Example: | ||
4379 | * row 2, col 3 (=12) says that the degradation at load idx 2 after | ||
4380 | * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8). | ||
4381 | * | ||
4382 | * With this power of 2 load factors, we can degrade the load n times | ||
4383 | * by looking at 1 bits in n and doing as many mult/shift instead of | ||
4384 | * n mult/shifts needed by the exact degradation. | ||
4385 | */ | ||
4386 | #define DEGRADE_SHIFT 7 | ||
4387 | static const unsigned char | ||
4388 | degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
4389 | static const unsigned char | ||
4390 | degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
4391 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
4392 | {64, 32, 8, 0, 0, 0, 0, 0}, | ||
4393 | {96, 72, 40, 12, 1, 0, 0}, | ||
4394 | {112, 98, 75, 43, 15, 1, 0}, | ||
4395 | {120, 112, 98, 76, 45, 16, 2} }; | ||
4396 | |||
4397 | /* | ||
4398 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
4399 | * would be when CPU is idle and so we just decay the old load without | ||
4400 | * adding any new load. | ||
4401 | */ | ||
4402 | static unsigned long | ||
4403 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
4404 | { | ||
4405 | int j = 0; | ||
4406 | |||
4407 | if (!missed_updates) | ||
4408 | return load; | ||
4409 | |||
4410 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
4411 | return 0; | ||
4412 | |||
4413 | if (idx == 1) | ||
4414 | return load >> missed_updates; | ||
4415 | |||
4416 | while (missed_updates) { | ||
4417 | if (missed_updates % 2) | ||
4418 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
4419 | |||
4420 | missed_updates >>= 1; | ||
4421 | j++; | ||
4422 | } | ||
4423 | return load; | ||
4424 | } | ||
4425 | |||
4426 | /* | ||
4427 | * Update rq->cpu_load[] statistics. This function is usually called every | ||
4428 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | ||
4429 | * every tick. We fix it up based on jiffies. | ||
4430 | */ | ||
4431 | static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | ||
4432 | unsigned long pending_updates) | ||
4433 | { | ||
4434 | int i, scale; | ||
4435 | |||
4436 | this_rq->nr_load_updates++; | ||
4437 | |||
4438 | /* Update our load: */ | ||
4439 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | ||
4440 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
4441 | unsigned long old_load, new_load; | ||
4442 | |||
4443 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
4444 | |||
4445 | old_load = this_rq->cpu_load[i]; | ||
4446 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
4447 | new_load = this_load; | ||
4448 | /* | ||
4449 | * Round up the averaging division if load is increasing. This | ||
4450 | * prevents us from getting stuck on 9 if the load is 10, for | ||
4451 | * example. | ||
4452 | */ | ||
4453 | if (new_load > old_load) | ||
4454 | new_load += scale - 1; | ||
4455 | |||
4456 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
4457 | } | ||
4458 | |||
4459 | sched_avg_update(this_rq); | ||
4460 | } | ||
4461 | |||
4462 | #ifdef CONFIG_NO_HZ_COMMON | ||
4463 | /* | ||
4464 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
4465 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
4466 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
4467 | * | ||
4468 | * Therefore we cannot use the delta approach from the regular tick since that | ||
4469 | * would seriously skew the load calculation. However we'll make do for those | ||
4470 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
4471 | * (tick_nohz_idle_exit). | ||
4472 | * | ||
4473 | * This means we might still be one tick off for nohz periods. | ||
4474 | */ | ||
4475 | |||
4476 | /* | ||
4477 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
4478 | * idle balance. | ||
4479 | */ | ||
4480 | static void update_idle_cpu_load(struct rq *this_rq) | ||
4481 | { | ||
4482 | unsigned long curr_jiffies = READ_ONCE(jiffies); | ||
4483 | unsigned long load = this_rq->cfs.runnable_load_avg; | ||
4484 | unsigned long pending_updates; | ||
4485 | |||
4486 | /* | ||
4487 | * bail if there's load or we're actually up-to-date. | ||
4488 | */ | ||
4489 | if (load || curr_jiffies == this_rq->last_load_update_tick) | ||
4490 | return; | ||
4491 | |||
4492 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
4493 | this_rq->last_load_update_tick = curr_jiffies; | ||
4494 | |||
4495 | __update_cpu_load(this_rq, load, pending_updates); | ||
4496 | } | ||
4497 | |||
4498 | /* | ||
4499 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
4500 | */ | ||
4501 | void update_cpu_load_nohz(void) | ||
4502 | { | ||
4503 | struct rq *this_rq = this_rq(); | ||
4504 | unsigned long curr_jiffies = READ_ONCE(jiffies); | ||
4505 | unsigned long pending_updates; | ||
4506 | |||
4507 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
4508 | return; | ||
4509 | |||
4510 | raw_spin_lock(&this_rq->lock); | ||
4511 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
4512 | if (pending_updates) { | ||
4513 | this_rq->last_load_update_tick = curr_jiffies; | ||
4514 | /* | ||
4515 | * We were idle, this means load 0, the current load might be | ||
4516 | * !0 due to remote wakeups and the sort. | ||
4517 | */ | ||
4518 | __update_cpu_load(this_rq, 0, pending_updates); | ||
4519 | } | ||
4520 | raw_spin_unlock(&this_rq->lock); | ||
4521 | } | ||
4522 | #endif /* CONFIG_NO_HZ */ | ||
4523 | |||
4524 | /* | ||
4525 | * Called from scheduler_tick() | ||
4526 | */ | ||
4527 | void update_cpu_load_active(struct rq *this_rq) | ||
4528 | { | ||
4529 | unsigned long load = this_rq->cfs.runnable_load_avg; | ||
4530 | /* | ||
4531 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). | ||
4532 | */ | ||
4533 | this_rq->last_load_update_tick = jiffies; | ||
4534 | __update_cpu_load(this_rq, load, 1); | ||
4535 | } | ||
4536 | |||
4326 | /* Used instead of source_load when we know the type == 0 */ | 4537 | /* Used instead of source_load when we know the type == 0 */ |
4327 | static unsigned long weighted_cpuload(const int cpu) | 4538 | static unsigned long weighted_cpuload(const int cpu) |
4328 | { | 4539 | { |
@@ -4375,7 +4586,7 @@ static unsigned long capacity_orig_of(int cpu) | |||
4375 | static unsigned long cpu_avg_load_per_task(int cpu) | 4586 | static unsigned long cpu_avg_load_per_task(int cpu) |
4376 | { | 4587 | { |
4377 | struct rq *rq = cpu_rq(cpu); | 4588 | struct rq *rq = cpu_rq(cpu); |
4378 | unsigned long nr_running = ACCESS_ONCE(rq->cfs.h_nr_running); | 4589 | unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running); |
4379 | unsigned long load_avg = rq->cfs.runnable_load_avg; | 4590 | unsigned long load_avg = rq->cfs.runnable_load_avg; |
4380 | 4591 | ||
4381 | if (nr_running) | 4592 | if (nr_running) |
@@ -5126,18 +5337,21 @@ again: | |||
5126 | * entity, update_curr() will update its vruntime, otherwise | 5337 | * entity, update_curr() will update its vruntime, otherwise |
5127 | * forget we've ever seen it. | 5338 | * forget we've ever seen it. |
5128 | */ | 5339 | */ |
5129 | if (curr && curr->on_rq) | 5340 | if (curr) { |
5130 | update_curr(cfs_rq); | 5341 | if (curr->on_rq) |
5131 | else | 5342 | update_curr(cfs_rq); |
5132 | curr = NULL; | 5343 | else |
5344 | curr = NULL; | ||
5133 | 5345 | ||
5134 | /* | 5346 | /* |
5135 | * This call to check_cfs_rq_runtime() will do the throttle and | 5347 | * This call to check_cfs_rq_runtime() will do the |
5136 | * dequeue its entity in the parent(s). Therefore the 'simple' | 5348 | * throttle and dequeue its entity in the parent(s). |
5137 | * nr_running test will indeed be correct. | 5349 | * Therefore the 'simple' nr_running test will indeed |
5138 | */ | 5350 | * be correct. |
5139 | if (unlikely(check_cfs_rq_runtime(cfs_rq))) | 5351 | */ |
5140 | goto simple; | 5352 | if (unlikely(check_cfs_rq_runtime(cfs_rq))) |
5353 | goto simple; | ||
5354 | } | ||
5141 | 5355 | ||
5142 | se = pick_next_entity(cfs_rq, curr); | 5356 | se = pick_next_entity(cfs_rq, curr); |
5143 | cfs_rq = group_cfs_rq(se); | 5357 | cfs_rq = group_cfs_rq(se); |
@@ -5467,10 +5681,15 @@ static int task_hot(struct task_struct *p, struct lb_env *env) | |||
5467 | } | 5681 | } |
5468 | 5682 | ||
5469 | #ifdef CONFIG_NUMA_BALANCING | 5683 | #ifdef CONFIG_NUMA_BALANCING |
5470 | /* Returns true if the destination node has incurred more faults */ | 5684 | /* |
5685 | * Returns true if the destination node is the preferred node. | ||
5686 | * Needs to match fbq_classify_rq(): if there is a runnable task | ||
5687 | * that is not on its preferred node, we should identify it. | ||
5688 | */ | ||
5471 | static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | 5689 | static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) |
5472 | { | 5690 | { |
5473 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | 5691 | struct numa_group *numa_group = rcu_dereference(p->numa_group); |
5692 | unsigned long src_faults, dst_faults; | ||
5474 | int src_nid, dst_nid; | 5693 | int src_nid, dst_nid; |
5475 | 5694 | ||
5476 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || | 5695 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || |
@@ -5484,29 +5703,30 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
5484 | if (src_nid == dst_nid) | 5703 | if (src_nid == dst_nid) |
5485 | return false; | 5704 | return false; |
5486 | 5705 | ||
5487 | if (numa_group) { | ||
5488 | /* Task is already in the group's interleave set. */ | ||
5489 | if (node_isset(src_nid, numa_group->active_nodes)) | ||
5490 | return false; | ||
5491 | |||
5492 | /* Task is moving into the group's interleave set. */ | ||
5493 | if (node_isset(dst_nid, numa_group->active_nodes)) | ||
5494 | return true; | ||
5495 | |||
5496 | return group_faults(p, dst_nid) > group_faults(p, src_nid); | ||
5497 | } | ||
5498 | |||
5499 | /* Encourage migration to the preferred node. */ | 5706 | /* Encourage migration to the preferred node. */ |
5500 | if (dst_nid == p->numa_preferred_nid) | 5707 | if (dst_nid == p->numa_preferred_nid) |
5501 | return true; | 5708 | return true; |
5502 | 5709 | ||
5503 | return task_faults(p, dst_nid) > task_faults(p, src_nid); | 5710 | /* Migrating away from the preferred node is bad. */ |
5711 | if (src_nid == p->numa_preferred_nid) | ||
5712 | return false; | ||
5713 | |||
5714 | if (numa_group) { | ||
5715 | src_faults = group_faults(p, src_nid); | ||
5716 | dst_faults = group_faults(p, dst_nid); | ||
5717 | } else { | ||
5718 | src_faults = task_faults(p, src_nid); | ||
5719 | dst_faults = task_faults(p, dst_nid); | ||
5720 | } | ||
5721 | |||
5722 | return dst_faults > src_faults; | ||
5504 | } | 5723 | } |
5505 | 5724 | ||
5506 | 5725 | ||
5507 | static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | 5726 | static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) |
5508 | { | 5727 | { |
5509 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | 5728 | struct numa_group *numa_group = rcu_dereference(p->numa_group); |
5729 | unsigned long src_faults, dst_faults; | ||
5510 | int src_nid, dst_nid; | 5730 | int src_nid, dst_nid; |
5511 | 5731 | ||
5512 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) | 5732 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) |
@@ -5521,23 +5741,23 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
5521 | if (src_nid == dst_nid) | 5741 | if (src_nid == dst_nid) |
5522 | return false; | 5742 | return false; |
5523 | 5743 | ||
5524 | if (numa_group) { | 5744 | /* Migrating away from the preferred node is bad. */ |
5525 | /* Task is moving within/into the group's interleave set. */ | 5745 | if (src_nid == p->numa_preferred_nid) |
5526 | if (node_isset(dst_nid, numa_group->active_nodes)) | 5746 | return true; |
5527 | return false; | ||
5528 | 5747 | ||
5529 | /* Task is moving out of the group's interleave set. */ | 5748 | /* Encourage migration to the preferred node. */ |
5530 | if (node_isset(src_nid, numa_group->active_nodes)) | 5749 | if (dst_nid == p->numa_preferred_nid) |
5531 | return true; | 5750 | return false; |
5532 | 5751 | ||
5533 | return group_faults(p, dst_nid) < group_faults(p, src_nid); | 5752 | if (numa_group) { |
5753 | src_faults = group_faults(p, src_nid); | ||
5754 | dst_faults = group_faults(p, dst_nid); | ||
5755 | } else { | ||
5756 | src_faults = task_faults(p, src_nid); | ||
5757 | dst_faults = task_faults(p, dst_nid); | ||
5534 | } | 5758 | } |
5535 | 5759 | ||
5536 | /* Migrating away from the preferred node is always bad. */ | 5760 | return dst_faults < src_faults; |
5537 | if (src_nid == p->numa_preferred_nid) | ||
5538 | return true; | ||
5539 | |||
5540 | return task_faults(p, dst_nid) < task_faults(p, src_nid); | ||
5541 | } | 5761 | } |
5542 | 5762 | ||
5543 | #else | 5763 | #else |
@@ -6037,8 +6257,8 @@ static unsigned long scale_rt_capacity(int cpu) | |||
6037 | * Since we're reading these variables without serialization make sure | 6257 | * Since we're reading these variables without serialization make sure |
6038 | * we read them once before doing sanity checks on them. | 6258 | * we read them once before doing sanity checks on them. |
6039 | */ | 6259 | */ |
6040 | age_stamp = ACCESS_ONCE(rq->age_stamp); | 6260 | age_stamp = READ_ONCE(rq->age_stamp); |
6041 | avg = ACCESS_ONCE(rq->rt_avg); | 6261 | avg = READ_ONCE(rq->rt_avg); |
6042 | delta = __rq_clock_broken(rq) - age_stamp; | 6262 | delta = __rq_clock_broken(rq) - age_stamp; |
6043 | 6263 | ||
6044 | if (unlikely(delta < 0)) | 6264 | if (unlikely(delta < 0)) |
diff --git a/kernel/sched/proc.c b/kernel/sched/loadavg.c index 8ecd552fe4f2..ef7159012cf3 100644 --- a/kernel/sched/proc.c +++ b/kernel/sched/loadavg.c | |||
@@ -1,7 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/sched/proc.c | 2 | * kernel/sched/loadavg.c |
3 | * | 3 | * |
4 | * Kernel load calculations, forked from sched/core.c | 4 | * This file contains the magic bits required to compute the global loadavg |
5 | * figure. Its a silly number but people think its important. We go through | ||
6 | * great pains to make it work on big machines and tickless kernels. | ||
5 | */ | 7 | */ |
6 | 8 | ||
7 | #include <linux/export.h> | 9 | #include <linux/export.h> |
@@ -81,7 +83,7 @@ long calc_load_fold_active(struct rq *this_rq) | |||
81 | long nr_active, delta = 0; | 83 | long nr_active, delta = 0; |
82 | 84 | ||
83 | nr_active = this_rq->nr_running; | 85 | nr_active = this_rq->nr_running; |
84 | nr_active += (long) this_rq->nr_uninterruptible; | 86 | nr_active += (long)this_rq->nr_uninterruptible; |
85 | 87 | ||
86 | if (nr_active != this_rq->calc_load_active) { | 88 | if (nr_active != this_rq->calc_load_active) { |
87 | delta = nr_active - this_rq->calc_load_active; | 89 | delta = nr_active - this_rq->calc_load_active; |
@@ -186,6 +188,7 @@ void calc_load_enter_idle(void) | |||
186 | delta = calc_load_fold_active(this_rq); | 188 | delta = calc_load_fold_active(this_rq); |
187 | if (delta) { | 189 | if (delta) { |
188 | int idx = calc_load_write_idx(); | 190 | int idx = calc_load_write_idx(); |
191 | |||
189 | atomic_long_add(delta, &calc_load_idle[idx]); | 192 | atomic_long_add(delta, &calc_load_idle[idx]); |
190 | } | 193 | } |
191 | } | 194 | } |
@@ -241,18 +244,20 @@ fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | |||
241 | { | 244 | { |
242 | unsigned long result = 1UL << frac_bits; | 245 | unsigned long result = 1UL << frac_bits; |
243 | 246 | ||
244 | if (n) for (;;) { | 247 | if (n) { |
245 | if (n & 1) { | 248 | for (;;) { |
246 | result *= x; | 249 | if (n & 1) { |
247 | result += 1UL << (frac_bits - 1); | 250 | result *= x; |
248 | result >>= frac_bits; | 251 | result += 1UL << (frac_bits - 1); |
252 | result >>= frac_bits; | ||
253 | } | ||
254 | n >>= 1; | ||
255 | if (!n) | ||
256 | break; | ||
257 | x *= x; | ||
258 | x += 1UL << (frac_bits - 1); | ||
259 | x >>= frac_bits; | ||
249 | } | 260 | } |
250 | n >>= 1; | ||
251 | if (!n) | ||
252 | break; | ||
253 | x *= x; | ||
254 | x += 1UL << (frac_bits - 1); | ||
255 | x >>= frac_bits; | ||
256 | } | 261 | } |
257 | 262 | ||
258 | return result; | 263 | return result; |
@@ -285,7 +290,6 @@ static unsigned long | |||
285 | calc_load_n(unsigned long load, unsigned long exp, | 290 | calc_load_n(unsigned long load, unsigned long exp, |
286 | unsigned long active, unsigned int n) | 291 | unsigned long active, unsigned int n) |
287 | { | 292 | { |
288 | |||
289 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | 293 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); |
290 | } | 294 | } |
291 | 295 | ||
@@ -339,6 +343,8 @@ static inline void calc_global_nohz(void) { } | |||
339 | /* | 343 | /* |
340 | * calc_load - update the avenrun load estimates 10 ticks after the | 344 | * calc_load - update the avenrun load estimates 10 ticks after the |
341 | * CPUs have updated calc_load_tasks. | 345 | * CPUs have updated calc_load_tasks. |
346 | * | ||
347 | * Called from the global timer code. | ||
342 | */ | 348 | */ |
343 | void calc_global_load(unsigned long ticks) | 349 | void calc_global_load(unsigned long ticks) |
344 | { | 350 | { |
@@ -370,10 +376,10 @@ void calc_global_load(unsigned long ticks) | |||
370 | } | 376 | } |
371 | 377 | ||
372 | /* | 378 | /* |
373 | * Called from update_cpu_load() to periodically update this CPU's | 379 | * Called from scheduler_tick() to periodically update this CPU's |
374 | * active count. | 380 | * active count. |
375 | */ | 381 | */ |
376 | static void calc_load_account_active(struct rq *this_rq) | 382 | void calc_global_load_tick(struct rq *this_rq) |
377 | { | 383 | { |
378 | long delta; | 384 | long delta; |
379 | 385 | ||
@@ -386,199 +392,3 @@ static void calc_load_account_active(struct rq *this_rq) | |||
386 | 392 | ||
387 | this_rq->calc_load_update += LOAD_FREQ; | 393 | this_rq->calc_load_update += LOAD_FREQ; |
388 | } | 394 | } |
389 | |||
390 | /* | ||
391 | * End of global load-average stuff | ||
392 | */ | ||
393 | |||
394 | /* | ||
395 | * The exact cpuload at various idx values, calculated at every tick would be | ||
396 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | ||
397 | * | ||
398 | * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called | ||
399 | * on nth tick when cpu may be busy, then we have: | ||
400 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
401 | * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load | ||
402 | * | ||
403 | * decay_load_missed() below does efficient calculation of | ||
404 | * load = ((2^idx - 1) / 2^idx)^(n-1) * load | ||
405 | * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load | ||
406 | * | ||
407 | * The calculation is approximated on a 128 point scale. | ||
408 | * degrade_zero_ticks is the number of ticks after which load at any | ||
409 | * particular idx is approximated to be zero. | ||
410 | * degrade_factor is a precomputed table, a row for each load idx. | ||
411 | * Each column corresponds to degradation factor for a power of two ticks, | ||
412 | * based on 128 point scale. | ||
413 | * Example: | ||
414 | * row 2, col 3 (=12) says that the degradation at load idx 2 after | ||
415 | * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8). | ||
416 | * | ||
417 | * With this power of 2 load factors, we can degrade the load n times | ||
418 | * by looking at 1 bits in n and doing as many mult/shift instead of | ||
419 | * n mult/shifts needed by the exact degradation. | ||
420 | */ | ||
421 | #define DEGRADE_SHIFT 7 | ||
422 | static const unsigned char | ||
423 | degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
424 | static const unsigned char | ||
425 | degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
426 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
427 | {64, 32, 8, 0, 0, 0, 0, 0}, | ||
428 | {96, 72, 40, 12, 1, 0, 0}, | ||
429 | {112, 98, 75, 43, 15, 1, 0}, | ||
430 | {120, 112, 98, 76, 45, 16, 2} }; | ||
431 | |||
432 | /* | ||
433 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
434 | * would be when CPU is idle and so we just decay the old load without | ||
435 | * adding any new load. | ||
436 | */ | ||
437 | static unsigned long | ||
438 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
439 | { | ||
440 | int j = 0; | ||
441 | |||
442 | if (!missed_updates) | ||
443 | return load; | ||
444 | |||
445 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
446 | return 0; | ||
447 | |||
448 | if (idx == 1) | ||
449 | return load >> missed_updates; | ||
450 | |||
451 | while (missed_updates) { | ||
452 | if (missed_updates % 2) | ||
453 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
454 | |||
455 | missed_updates >>= 1; | ||
456 | j++; | ||
457 | } | ||
458 | return load; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Update rq->cpu_load[] statistics. This function is usually called every | ||
463 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | ||
464 | * every tick. We fix it up based on jiffies. | ||
465 | */ | ||
466 | static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | ||
467 | unsigned long pending_updates) | ||
468 | { | ||
469 | int i, scale; | ||
470 | |||
471 | this_rq->nr_load_updates++; | ||
472 | |||
473 | /* Update our load: */ | ||
474 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | ||
475 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
476 | unsigned long old_load, new_load; | ||
477 | |||
478 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
479 | |||
480 | old_load = this_rq->cpu_load[i]; | ||
481 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
482 | new_load = this_load; | ||
483 | /* | ||
484 | * Round up the averaging division if load is increasing. This | ||
485 | * prevents us from getting stuck on 9 if the load is 10, for | ||
486 | * example. | ||
487 | */ | ||
488 | if (new_load > old_load) | ||
489 | new_load += scale - 1; | ||
490 | |||
491 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
492 | } | ||
493 | |||
494 | sched_avg_update(this_rq); | ||
495 | } | ||
496 | |||
497 | #ifdef CONFIG_SMP | ||
498 | static inline unsigned long get_rq_runnable_load(struct rq *rq) | ||
499 | { | ||
500 | return rq->cfs.runnable_load_avg; | ||
501 | } | ||
502 | #else | ||
503 | static inline unsigned long get_rq_runnable_load(struct rq *rq) | ||
504 | { | ||
505 | return rq->load.weight; | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | #ifdef CONFIG_NO_HZ_COMMON | ||
510 | /* | ||
511 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
512 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | ||
513 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
514 | * | ||
515 | * Therefore we cannot use the delta approach from the regular tick since that | ||
516 | * would seriously skew the load calculation. However we'll make do for those | ||
517 | * updates happening while idle (nohz_idle_balance) or coming out of idle | ||
518 | * (tick_nohz_idle_exit). | ||
519 | * | ||
520 | * This means we might still be one tick off for nohz periods. | ||
521 | */ | ||
522 | |||
523 | /* | ||
524 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
525 | * idle balance. | ||
526 | */ | ||
527 | void update_idle_cpu_load(struct rq *this_rq) | ||
528 | { | ||
529 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
530 | unsigned long load = get_rq_runnable_load(this_rq); | ||
531 | unsigned long pending_updates; | ||
532 | |||
533 | /* | ||
534 | * bail if there's load or we're actually up-to-date. | ||
535 | */ | ||
536 | if (load || curr_jiffies == this_rq->last_load_update_tick) | ||
537 | return; | ||
538 | |||
539 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
540 | this_rq->last_load_update_tick = curr_jiffies; | ||
541 | |||
542 | __update_cpu_load(this_rq, load, pending_updates); | ||
543 | } | ||
544 | |||
545 | /* | ||
546 | * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. | ||
547 | */ | ||
548 | void update_cpu_load_nohz(void) | ||
549 | { | ||
550 | struct rq *this_rq = this_rq(); | ||
551 | unsigned long curr_jiffies = ACCESS_ONCE(jiffies); | ||
552 | unsigned long pending_updates; | ||
553 | |||
554 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
555 | return; | ||
556 | |||
557 | raw_spin_lock(&this_rq->lock); | ||
558 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
559 | if (pending_updates) { | ||
560 | this_rq->last_load_update_tick = curr_jiffies; | ||
561 | /* | ||
562 | * We were idle, this means load 0, the current load might be | ||
563 | * !0 due to remote wakeups and the sort. | ||
564 | */ | ||
565 | __update_cpu_load(this_rq, 0, pending_updates); | ||
566 | } | ||
567 | raw_spin_unlock(&this_rq->lock); | ||
568 | } | ||
569 | #endif /* CONFIG_NO_HZ */ | ||
570 | |||
571 | /* | ||
572 | * Called from scheduler_tick() | ||
573 | */ | ||
574 | void update_cpu_load_active(struct rq *this_rq) | ||
575 | { | ||
576 | unsigned long load = get_rq_runnable_load(this_rq); | ||
577 | /* | ||
578 | * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). | ||
579 | */ | ||
580 | this_rq->last_load_update_tick = jiffies; | ||
581 | __update_cpu_load(this_rq, load, 1); | ||
582 | |||
583 | calc_load_account_active(this_rq); | ||
584 | } | ||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 575da76a3874..560d2fa623c3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1323,7 +1323,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
1323 | rq = cpu_rq(cpu); | 1323 | rq = cpu_rq(cpu); |
1324 | 1324 | ||
1325 | rcu_read_lock(); | 1325 | rcu_read_lock(); |
1326 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | 1326 | curr = READ_ONCE(rq->curr); /* unlocked access */ |
1327 | 1327 | ||
1328 | /* | 1328 | /* |
1329 | * If the current task on @p's runqueue is an RT task, then | 1329 | * If the current task on @p's runqueue is an RT task, then |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0e129993958..d62b2882232b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -26,8 +26,14 @@ extern __read_mostly int scheduler_running; | |||
26 | extern unsigned long calc_load_update; | 26 | extern unsigned long calc_load_update; |
27 | extern atomic_long_t calc_load_tasks; | 27 | extern atomic_long_t calc_load_tasks; |
28 | 28 | ||
29 | extern void calc_global_load_tick(struct rq *this_rq); | ||
29 | extern long calc_load_fold_active(struct rq *this_rq); | 30 | extern long calc_load_fold_active(struct rq *this_rq); |
31 | |||
32 | #ifdef CONFIG_SMP | ||
30 | extern void update_cpu_load_active(struct rq *this_rq); | 33 | extern void update_cpu_load_active(struct rq *this_rq); |
34 | #else | ||
35 | static inline void update_cpu_load_active(struct rq *this_rq) { } | ||
36 | #endif | ||
31 | 37 | ||
32 | /* | 38 | /* |
33 | * Helpers for converting nanosecond timing to jiffy resolution | 39 | * Helpers for converting nanosecond timing to jiffy resolution |
@@ -707,7 +713,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | |||
707 | 713 | ||
708 | static inline u64 __rq_clock_broken(struct rq *rq) | 714 | static inline u64 __rq_clock_broken(struct rq *rq) |
709 | { | 715 | { |
710 | return ACCESS_ONCE(rq->clock); | 716 | return READ_ONCE(rq->clock); |
711 | } | 717 | } |
712 | 718 | ||
713 | static inline u64 rq_clock(struct rq *rq) | 719 | static inline u64 rq_clock(struct rq *rq) |
@@ -1284,7 +1290,6 @@ extern void update_max_interval(void); | |||
1284 | extern void init_sched_dl_class(void); | 1290 | extern void init_sched_dl_class(void); |
1285 | extern void init_sched_rt_class(void); | 1291 | extern void init_sched_rt_class(void); |
1286 | extern void init_sched_fair_class(void); | 1292 | extern void init_sched_fair_class(void); |
1287 | extern void init_sched_dl_class(void); | ||
1288 | 1293 | ||
1289 | extern void resched_curr(struct rq *rq); | 1294 | extern void resched_curr(struct rq *rq); |
1290 | extern void resched_cpu(int cpu); | 1295 | extern void resched_cpu(int cpu); |
@@ -1298,8 +1303,6 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se); | |||
1298 | 1303 | ||
1299 | unsigned long to_ratio(u64 period, u64 runtime); | 1304 | unsigned long to_ratio(u64 period, u64 runtime); |
1300 | 1305 | ||
1301 | extern void update_idle_cpu_load(struct rq *this_rq); | ||
1302 | |||
1303 | extern void init_task_runnable_average(struct task_struct *p); | 1306 | extern void init_task_runnable_average(struct task_struct *p); |
1304 | 1307 | ||
1305 | static inline void add_nr_running(struct rq *rq, unsigned count) | 1308 | static inline void add_nr_running(struct rq *rq, unsigned count) |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 4ab704339656..077ebbd5e10f 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
@@ -174,7 +174,8 @@ static inline bool cputimer_running(struct task_struct *tsk) | |||
174 | { | 174 | { |
175 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 175 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
176 | 176 | ||
177 | if (!cputimer->running) | 177 | /* Check if cputimer isn't running. This is accessed without locking. */ |
178 | if (!READ_ONCE(cputimer->running)) | ||
178 | return false; | 179 | return false; |
179 | 180 | ||
180 | /* | 181 | /* |
@@ -215,9 +216,7 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
215 | if (!cputimer_running(tsk)) | 216 | if (!cputimer_running(tsk)) |
216 | return; | 217 | return; |
217 | 218 | ||
218 | raw_spin_lock(&cputimer->lock); | 219 | atomic64_add(cputime, &cputimer->cputime_atomic.utime); |
219 | cputimer->cputime.utime += cputime; | ||
220 | raw_spin_unlock(&cputimer->lock); | ||
221 | } | 220 | } |
222 | 221 | ||
223 | /** | 222 | /** |
@@ -238,9 +237,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
238 | if (!cputimer_running(tsk)) | 237 | if (!cputimer_running(tsk)) |
239 | return; | 238 | return; |
240 | 239 | ||
241 | raw_spin_lock(&cputimer->lock); | 240 | atomic64_add(cputime, &cputimer->cputime_atomic.stime); |
242 | cputimer->cputime.stime += cputime; | ||
243 | raw_spin_unlock(&cputimer->lock); | ||
244 | } | 241 | } |
245 | 242 | ||
246 | /** | 243 | /** |
@@ -261,7 +258,5 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
261 | if (!cputimer_running(tsk)) | 258 | if (!cputimer_running(tsk)) |
262 | return; | 259 | return; |
263 | 260 | ||
264 | raw_spin_lock(&cputimer->lock); | 261 | atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); |
265 | cputimer->cputime.sum_exec_runtime += ns; | ||
266 | raw_spin_unlock(&cputimer->lock); | ||
267 | } | 262 | } |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9bc82329eaad..052e02672d12 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bit_wait_io); | |||
601 | 601 | ||
602 | __sched int bit_wait_timeout(struct wait_bit_key *word) | 602 | __sched int bit_wait_timeout(struct wait_bit_key *word) |
603 | { | 603 | { |
604 | unsigned long now = ACCESS_ONCE(jiffies); | 604 | unsigned long now = READ_ONCE(jiffies); |
605 | if (signal_pending_state(current->state, current)) | 605 | if (signal_pending_state(current->state, current)) |
606 | return 1; | 606 | return 1; |
607 | if (time_after_eq(now, word->timeout)) | 607 | if (time_after_eq(now, word->timeout)) |
@@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout); | |||
613 | 613 | ||
614 | __sched int bit_wait_io_timeout(struct wait_bit_key *word) | 614 | __sched int bit_wait_io_timeout(struct wait_bit_key *word) |
615 | { | 615 | { |
616 | unsigned long now = ACCESS_ONCE(jiffies); | 616 | unsigned long now = READ_ONCE(jiffies); |
617 | if (signal_pending_state(current->state, current)) | 617 | if (signal_pending_state(current->state, current)) |
618 | return 1; | 618 | return 1; |
619 | if (time_after_eq(now, word->timeout)) | 619 | if (time_after_eq(now, word->timeout)) |
diff --git a/kernel/signal.c b/kernel/signal.c index d51c5ddd855c..f19833b5db3c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -245,7 +245,7 @@ static inline void print_dropped_signal(int sig) | |||
245 | * RETURNS: | 245 | * RETURNS: |
246 | * %true if @mask is set, %false if made noop because @task was dying. | 246 | * %true if @mask is set, %false if made noop because @task was dying. |
247 | */ | 247 | */ |
248 | bool task_set_jobctl_pending(struct task_struct *task, unsigned int mask) | 248 | bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask) |
249 | { | 249 | { |
250 | BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME | | 250 | BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME | |
251 | JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); | 251 | JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); |
@@ -297,7 +297,7 @@ void task_clear_jobctl_trapping(struct task_struct *task) | |||
297 | * CONTEXT: | 297 | * CONTEXT: |
298 | * Must be called with @task->sighand->siglock held. | 298 | * Must be called with @task->sighand->siglock held. |
299 | */ | 299 | */ |
300 | void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask) | 300 | void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask) |
301 | { | 301 | { |
302 | BUG_ON(mask & ~JOBCTL_PENDING_MASK); | 302 | BUG_ON(mask & ~JOBCTL_PENDING_MASK); |
303 | 303 | ||
@@ -2000,7 +2000,7 @@ static bool do_signal_stop(int signr) | |||
2000 | struct signal_struct *sig = current->signal; | 2000 | struct signal_struct *sig = current->signal; |
2001 | 2001 | ||
2002 | if (!(current->jobctl & JOBCTL_STOP_PENDING)) { | 2002 | if (!(current->jobctl & JOBCTL_STOP_PENDING)) { |
2003 | unsigned int gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; | 2003 | unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; |
2004 | struct task_struct *t; | 2004 | struct task_struct *t; |
2005 | 2005 | ||
2006 | /* signr will be recorded in task->jobctl for retries */ | 2006 | /* signr will be recorded in task->jobctl for retries */ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 695f0c6cd169..fd643d8c4b42 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -211,25 +211,6 @@ static int multi_cpu_stop(void *data) | |||
211 | return err; | 211 | return err; |
212 | } | 212 | } |
213 | 213 | ||
214 | struct irq_cpu_stop_queue_work_info { | ||
215 | int cpu1; | ||
216 | int cpu2; | ||
217 | struct cpu_stop_work *work1; | ||
218 | struct cpu_stop_work *work2; | ||
219 | }; | ||
220 | |||
221 | /* | ||
222 | * This function is always run with irqs and preemption disabled. | ||
223 | * This guarantees that both work1 and work2 get queued, before | ||
224 | * our local migrate thread gets the chance to preempt us. | ||
225 | */ | ||
226 | static void irq_cpu_stop_queue_work(void *arg) | ||
227 | { | ||
228 | struct irq_cpu_stop_queue_work_info *info = arg; | ||
229 | cpu_stop_queue_work(info->cpu1, info->work1); | ||
230 | cpu_stop_queue_work(info->cpu2, info->work2); | ||
231 | } | ||
232 | |||
233 | /** | 214 | /** |
234 | * stop_two_cpus - stops two cpus | 215 | * stop_two_cpus - stops two cpus |
235 | * @cpu1: the cpu to stop | 216 | * @cpu1: the cpu to stop |
@@ -245,7 +226,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |||
245 | { | 226 | { |
246 | struct cpu_stop_done done; | 227 | struct cpu_stop_done done; |
247 | struct cpu_stop_work work1, work2; | 228 | struct cpu_stop_work work1, work2; |
248 | struct irq_cpu_stop_queue_work_info call_args; | ||
249 | struct multi_stop_data msdata; | 229 | struct multi_stop_data msdata; |
250 | 230 | ||
251 | preempt_disable(); | 231 | preempt_disable(); |
@@ -262,13 +242,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |||
262 | .done = &done | 242 | .done = &done |
263 | }; | 243 | }; |
264 | 244 | ||
265 | call_args = (struct irq_cpu_stop_queue_work_info){ | ||
266 | .cpu1 = cpu1, | ||
267 | .cpu2 = cpu2, | ||
268 | .work1 = &work1, | ||
269 | .work2 = &work2, | ||
270 | }; | ||
271 | |||
272 | cpu_stop_init_done(&done, 2); | 245 | cpu_stop_init_done(&done, 2); |
273 | set_state(&msdata, MULTI_STOP_PREPARE); | 246 | set_state(&msdata, MULTI_STOP_PREPARE); |
274 | 247 | ||
@@ -285,16 +258,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |||
285 | return -ENOENT; | 258 | return -ENOENT; |
286 | } | 259 | } |
287 | 260 | ||
288 | lg_local_lock(&stop_cpus_lock); | 261 | lg_double_lock(&stop_cpus_lock, cpu1, cpu2); |
289 | /* | 262 | cpu_stop_queue_work(cpu1, &work1); |
290 | * Queuing needs to be done by the lowest numbered CPU, to ensure | 263 | cpu_stop_queue_work(cpu2, &work2); |
291 | * that works are always queued in the same order on every CPU. | 264 | lg_double_unlock(&stop_cpus_lock, cpu1, cpu2); |
292 | * This prevents deadlocks. | 265 | |
293 | */ | ||
294 | smp_call_function_single(min(cpu1, cpu2), | ||
295 | &irq_cpu_stop_queue_work, | ||
296 | &call_args, 1); | ||
297 | lg_local_unlock(&stop_cpus_lock); | ||
298 | preempt_enable(); | 266 | preempt_enable(); |
299 | 267 | ||
300 | wait_for_completion(&done.completion); | 268 | wait_for_completion(&done.completion); |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0075da74abf0..892e3dae0aac 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -196,39 +196,62 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
196 | return 0; | 196 | return 0; |
197 | } | 197 | } |
198 | 198 | ||
199 | static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b) | 199 | /* |
200 | * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg | ||
201 | * to avoid race conditions with concurrent updates to cputime. | ||
202 | */ | ||
203 | static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime) | ||
200 | { | 204 | { |
201 | if (b->utime > a->utime) | 205 | u64 curr_cputime; |
202 | a->utime = b->utime; | 206 | retry: |
207 | curr_cputime = atomic64_read(cputime); | ||
208 | if (sum_cputime > curr_cputime) { | ||
209 | if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime) | ||
210 | goto retry; | ||
211 | } | ||
212 | } | ||
203 | 213 | ||
204 | if (b->stime > a->stime) | 214 | static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) |
205 | a->stime = b->stime; | 215 | { |
216 | __update_gt_cputime(&cputime_atomic->utime, sum->utime); | ||
217 | __update_gt_cputime(&cputime_atomic->stime, sum->stime); | ||
218 | __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); | ||
219 | } | ||
206 | 220 | ||
207 | if (b->sum_exec_runtime > a->sum_exec_runtime) | 221 | /* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */ |
208 | a->sum_exec_runtime = b->sum_exec_runtime; | 222 | static inline void sample_cputime_atomic(struct task_cputime *times, |
223 | struct task_cputime_atomic *atomic_times) | ||
224 | { | ||
225 | times->utime = atomic64_read(&atomic_times->utime); | ||
226 | times->stime = atomic64_read(&atomic_times->stime); | ||
227 | times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime); | ||
209 | } | 228 | } |
210 | 229 | ||
211 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | 230 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) |
212 | { | 231 | { |
213 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 232 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
214 | struct task_cputime sum; | 233 | struct task_cputime sum; |
215 | unsigned long flags; | ||
216 | 234 | ||
217 | if (!cputimer->running) { | 235 | /* Check if cputimer isn't running. This is accessed without locking. */ |
236 | if (!READ_ONCE(cputimer->running)) { | ||
218 | /* | 237 | /* |
219 | * The POSIX timer interface allows for absolute time expiry | 238 | * The POSIX timer interface allows for absolute time expiry |
220 | * values through the TIMER_ABSTIME flag, therefore we have | 239 | * values through the TIMER_ABSTIME flag, therefore we have |
221 | * to synchronize the timer to the clock every time we start | 240 | * to synchronize the timer to the clock every time we start it. |
222 | * it. | ||
223 | */ | 241 | */ |
224 | thread_group_cputime(tsk, &sum); | 242 | thread_group_cputime(tsk, &sum); |
225 | raw_spin_lock_irqsave(&cputimer->lock, flags); | 243 | update_gt_cputime(&cputimer->cputime_atomic, &sum); |
226 | cputimer->running = 1; | 244 | |
227 | update_gt_cputime(&cputimer->cputime, &sum); | 245 | /* |
228 | } else | 246 | * We're setting cputimer->running without a lock. Ensure |
229 | raw_spin_lock_irqsave(&cputimer->lock, flags); | 247 | * this only gets written to in one operation. We set |
230 | *times = cputimer->cputime; | 248 | * running after update_gt_cputime() as a small optimization, |
231 | raw_spin_unlock_irqrestore(&cputimer->lock, flags); | 249 | * but barriers are not required because update_gt_cputime() |
250 | * can handle concurrent updates. | ||
251 | */ | ||
252 | WRITE_ONCE(cputimer->running, 1); | ||
253 | } | ||
254 | sample_cputime_atomic(times, &cputimer->cputime_atomic); | ||
232 | } | 255 | } |
233 | 256 | ||
234 | /* | 257 | /* |
@@ -582,7 +605,8 @@ bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) | |||
582 | if (!task_cputime_zero(&tsk->cputime_expires)) | 605 | if (!task_cputime_zero(&tsk->cputime_expires)) |
583 | return false; | 606 | return false; |
584 | 607 | ||
585 | if (tsk->signal->cputimer.running) | 608 | /* Check if cputimer is running. This is accessed without locking. */ |
609 | if (READ_ONCE(tsk->signal->cputimer.running)) | ||
586 | return false; | 610 | return false; |
587 | 611 | ||
588 | return true; | 612 | return true; |
@@ -852,10 +876,10 @@ static void check_thread_timers(struct task_struct *tsk, | |||
852 | /* | 876 | /* |
853 | * Check for the special case thread timers. | 877 | * Check for the special case thread timers. |
854 | */ | 878 | */ |
855 | soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); | 879 | soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); |
856 | if (soft != RLIM_INFINITY) { | 880 | if (soft != RLIM_INFINITY) { |
857 | unsigned long hard = | 881 | unsigned long hard = |
858 | ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max); | 882 | READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max); |
859 | 883 | ||
860 | if (hard != RLIM_INFINITY && | 884 | if (hard != RLIM_INFINITY && |
861 | tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { | 885 | tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { |
@@ -882,14 +906,12 @@ static void check_thread_timers(struct task_struct *tsk, | |||
882 | } | 906 | } |
883 | } | 907 | } |
884 | 908 | ||
885 | static void stop_process_timers(struct signal_struct *sig) | 909 | static inline void stop_process_timers(struct signal_struct *sig) |
886 | { | 910 | { |
887 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 911 | struct thread_group_cputimer *cputimer = &sig->cputimer; |
888 | unsigned long flags; | ||
889 | 912 | ||
890 | raw_spin_lock_irqsave(&cputimer->lock, flags); | 913 | /* Turn off cputimer->running. This is done without locking. */ |
891 | cputimer->running = 0; | 914 | WRITE_ONCE(cputimer->running, 0); |
892 | raw_spin_unlock_irqrestore(&cputimer->lock, flags); | ||
893 | } | 915 | } |
894 | 916 | ||
895 | static u32 onecputick; | 917 | static u32 onecputick; |
@@ -958,11 +980,11 @@ static void check_process_timers(struct task_struct *tsk, | |||
958 | SIGPROF); | 980 | SIGPROF); |
959 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, | 981 | check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, |
960 | SIGVTALRM); | 982 | SIGVTALRM); |
961 | soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); | 983 | soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
962 | if (soft != RLIM_INFINITY) { | 984 | if (soft != RLIM_INFINITY) { |
963 | unsigned long psecs = cputime_to_secs(ptime); | 985 | unsigned long psecs = cputime_to_secs(ptime); |
964 | unsigned long hard = | 986 | unsigned long hard = |
965 | ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); | 987 | READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); |
966 | cputime_t x; | 988 | cputime_t x; |
967 | if (psecs >= hard) { | 989 | if (psecs >= hard) { |
968 | /* | 990 | /* |
@@ -1111,12 +1133,11 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1111 | } | 1133 | } |
1112 | 1134 | ||
1113 | sig = tsk->signal; | 1135 | sig = tsk->signal; |
1114 | if (sig->cputimer.running) { | 1136 | /* Check if cputimer is running. This is accessed without locking. */ |
1137 | if (READ_ONCE(sig->cputimer.running)) { | ||
1115 | struct task_cputime group_sample; | 1138 | struct task_cputime group_sample; |
1116 | 1139 | ||
1117 | raw_spin_lock(&sig->cputimer.lock); | 1140 | sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); |
1118 | group_sample = sig->cputimer.cputime; | ||
1119 | raw_spin_unlock(&sig->cputimer.lock); | ||
1120 | 1141 | ||
1121 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | 1142 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) |
1122 | return 1; | 1143 | return 1; |
@@ -1157,7 +1178,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1157 | * If there are any active process wide timers (POSIX 1.b, itimers, | 1178 | * If there are any active process wide timers (POSIX 1.b, itimers, |
1158 | * RLIMIT_CPU) cputimer must be running. | 1179 | * RLIMIT_CPU) cputimer must be running. |
1159 | */ | 1180 | */ |
1160 | if (tsk->signal->cputimer.running) | 1181 | if (READ_ONCE(tsk->signal->cputimer.running)) |
1161 | check_process_timers(tsk, &firing); | 1182 | check_process_timers(tsk, &firing); |
1162 | 1183 | ||
1163 | /* | 1184 | /* |
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 4f134d8907a7..f610b2a10b3e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c | |||
@@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, | |||
191 | /* Update distances based on topology */ | 191 | /* Update distances based on topology */ |
192 | for_each_cpu(cpu, update_mask) { | 192 | for_each_cpu(cpu, update_mask) { |
193 | if (cpu_rmap_copy_neigh(rmap, cpu, | 193 | if (cpu_rmap_copy_neigh(rmap, cpu, |
194 | topology_thread_cpumask(cpu), 1)) | 194 | topology_sibling_cpumask(cpu), 1)) |
195 | continue; | 195 | continue; |
196 | if (cpu_rmap_copy_neigh(rmap, cpu, | 196 | if (cpu_rmap_copy_neigh(rmap, cpu, |
197 | topology_core_cpumask(cpu), 2)) | 197 | topology_core_cpumask(cpu), 2)) |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3d2aa27b845b..061550de77bc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
35 | #include <linux/rcupdate.h> | 35 | #include <linux/rcupdate.h> |
36 | #include <linux/preempt_mask.h> /* in_interrupt() */ | 36 | #include <linux/preempt.h> /* in_interrupt() */ |
37 | 37 | ||
38 | 38 | ||
39 | /* | 39 | /* |
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index fe9a32591c24..3a5f2b366d84 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c | |||
@@ -85,7 +85,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, | |||
85 | * @str: The string to measure. | 85 | * @str: The string to measure. |
86 | * @count: Maximum count (including NUL character) | 86 | * @count: Maximum count (including NUL character) |
87 | * | 87 | * |
88 | * Context: User context only. This function may sleep. | 88 | * Context: User context only. This function may sleep if pagefaults are |
89 | * enabled. | ||
89 | * | 90 | * |
90 | * Get the size of a NUL-terminated string in user space. | 91 | * Get the size of a NUL-terminated string in user space. |
91 | * | 92 | * |
@@ -121,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user); | |||
121 | * strlen_user: - Get the size of a user string INCLUDING final NUL. | 122 | * strlen_user: - Get the size of a user string INCLUDING final NUL. |
122 | * @str: The string to measure. | 123 | * @str: The string to measure. |
123 | * | 124 | * |
124 | * Context: User context only. This function may sleep. | 125 | * Context: User context only. This function may sleep if pagefaults are |
126 | * enabled. | ||
125 | * | 127 | * |
126 | * Get the size of a NUL-terminated string in user space. | 128 | * Get the size of a NUL-terminated string in user space. |
127 | * | 129 | * |
diff --git a/mm/memory.c b/mm/memory.c index 22e037e3364e..17734c3c1183 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3737,7 +3737,7 @@ void print_vma_addr(char *prefix, unsigned long ip) | |||
3737 | } | 3737 | } |
3738 | 3738 | ||
3739 | #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) | 3739 | #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) |
3740 | void might_fault(void) | 3740 | void __might_fault(const char *file, int line) |
3741 | { | 3741 | { |
3742 | /* | 3742 | /* |
3743 | * Some code (nfs/sunrpc) uses socket ops on kernel memory while | 3743 | * Some code (nfs/sunrpc) uses socket ops on kernel memory while |
@@ -3747,21 +3747,15 @@ void might_fault(void) | |||
3747 | */ | 3747 | */ |
3748 | if (segment_eq(get_fs(), KERNEL_DS)) | 3748 | if (segment_eq(get_fs(), KERNEL_DS)) |
3749 | return; | 3749 | return; |
3750 | 3750 | if (pagefault_disabled()) | |
3751 | /* | ||
3752 | * it would be nicer only to annotate paths which are not under | ||
3753 | * pagefault_disable, however that requires a larger audit and | ||
3754 | * providing helpers like get_user_atomic. | ||
3755 | */ | ||
3756 | if (in_atomic()) | ||
3757 | return; | 3751 | return; |
3758 | 3752 | __might_sleep(file, line, 0); | |
3759 | __might_sleep(__FILE__, __LINE__, 0); | 3753 | #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) |
3760 | |||
3761 | if (current->mm) | 3754 | if (current->mm) |
3762 | might_lock_read(¤t->mm->mmap_sem); | 3755 | might_lock_read(¤t->mm->mmap_sem); |
3756 | #endif | ||
3763 | } | 3757 | } |
3764 | EXPORT_SYMBOL(might_fault); | 3758 | EXPORT_SYMBOL(__might_fault); |
3765 | #endif | 3759 | #endif |
3766 | 3760 | ||
3767 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) | 3761 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) |