diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/membarrier.c | 120 |
1 files changed, 112 insertions, 8 deletions
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 678577267a9a..d2087d5f9837 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c | |||
@@ -27,7 +27,9 @@ | |||
27 | * except MEMBARRIER_CMD_QUERY. | 27 | * except MEMBARRIER_CMD_QUERY. |
28 | */ | 28 | */ |
29 | #define MEMBARRIER_CMD_BITMASK \ | 29 | #define MEMBARRIER_CMD_BITMASK \ |
30 | (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | 30 | (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ |
31 | | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | ||
32 | | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | ||
31 | | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) | 33 | | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) |
32 | 34 | ||
33 | static void ipi_mb(void *info) | 35 | static void ipi_mb(void *info) |
@@ -35,6 +37,73 @@ static void ipi_mb(void *info) | |||
35 | smp_mb(); /* IPIs should be serializing but paranoid. */ | 37 | smp_mb(); /* IPIs should be serializing but paranoid. */ |
36 | } | 38 | } |
37 | 39 | ||
40 | static int membarrier_global_expedited(void) | ||
41 | { | ||
42 | int cpu; | ||
43 | bool fallback = false; | ||
44 | cpumask_var_t tmpmask; | ||
45 | |||
46 | if (num_online_cpus() == 1) | ||
47 | return 0; | ||
48 | |||
49 | /* | ||
50 | * Matches memory barriers around rq->curr modification in | ||
51 | * scheduler. | ||
52 | */ | ||
53 | smp_mb(); /* system call entry is not a mb. */ | ||
54 | |||
55 | /* | ||
56 | * Expedited membarrier commands guarantee that they won't | ||
57 | * block, hence the GFP_NOWAIT allocation flag and fallback | ||
58 | * implementation. | ||
59 | */ | ||
60 | if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { | ||
61 | /* Fallback for OOM. */ | ||
62 | fallback = true; | ||
63 | } | ||
64 | |||
65 | cpus_read_lock(); | ||
66 | for_each_online_cpu(cpu) { | ||
67 | struct task_struct *p; | ||
68 | |||
69 | /* | ||
70 | * Skipping the current CPU is OK even through we can be | ||
71 | * migrated at any point. The current CPU, at the point | ||
72 | * where we read raw_smp_processor_id(), is ensured to | ||
73 | * be in program order with respect to the caller | ||
74 | * thread. Therefore, we can skip this CPU from the | ||
75 | * iteration. | ||
76 | */ | ||
77 | if (cpu == raw_smp_processor_id()) | ||
78 | continue; | ||
79 | rcu_read_lock(); | ||
80 | p = task_rcu_dereference(&cpu_rq(cpu)->curr); | ||
81 | if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & | ||
82 | MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { | ||
83 | if (!fallback) | ||
84 | __cpumask_set_cpu(cpu, tmpmask); | ||
85 | else | ||
86 | smp_call_function_single(cpu, ipi_mb, NULL, 1); | ||
87 | } | ||
88 | rcu_read_unlock(); | ||
89 | } | ||
90 | if (!fallback) { | ||
91 | preempt_disable(); | ||
92 | smp_call_function_many(tmpmask, ipi_mb, NULL, 1); | ||
93 | preempt_enable(); | ||
94 | free_cpumask_var(tmpmask); | ||
95 | } | ||
96 | cpus_read_unlock(); | ||
97 | |||
98 | /* | ||
99 | * Memory barrier on the caller thread _after_ we finished | ||
100 | * waiting for the last IPI. Matches memory barriers around | ||
101 | * rq->curr modification in scheduler. | ||
102 | */ | ||
103 | smp_mb(); /* exit from system call is not a mb */ | ||
104 | return 0; | ||
105 | } | ||
106 | |||
38 | static int membarrier_private_expedited(void) | 107 | static int membarrier_private_expedited(void) |
39 | { | 108 | { |
40 | int cpu; | 109 | int cpu; |
@@ -105,7 +174,38 @@ static int membarrier_private_expedited(void) | |||
105 | return 0; | 174 | return 0; |
106 | } | 175 | } |
107 | 176 | ||
108 | static void membarrier_register_private_expedited(void) | 177 | static int membarrier_register_global_expedited(void) |
178 | { | ||
179 | struct task_struct *p = current; | ||
180 | struct mm_struct *mm = p->mm; | ||
181 | |||
182 | if (atomic_read(&mm->membarrier_state) & | ||
183 | MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY) | ||
184 | return 0; | ||
185 | atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state); | ||
186 | if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) { | ||
187 | /* | ||
188 | * For single mm user, single threaded process, we can | ||
189 | * simply issue a memory barrier after setting | ||
190 | * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that | ||
191 | * no memory access following registration is reordered | ||
192 | * before registration. | ||
193 | */ | ||
194 | smp_mb(); | ||
195 | } else { | ||
196 | /* | ||
197 | * For multi-mm user threads, we need to ensure all | ||
198 | * future scheduler executions will observe the new | ||
199 | * thread flag state for this mm. | ||
200 | */ | ||
201 | synchronize_sched(); | ||
202 | } | ||
203 | atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, | ||
204 | &mm->membarrier_state); | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static int membarrier_register_private_expedited(void) | ||
109 | { | 209 | { |
110 | struct task_struct *p = current; | 210 | struct task_struct *p = current; |
111 | struct mm_struct *mm = p->mm; | 211 | struct mm_struct *mm = p->mm; |
@@ -117,7 +217,7 @@ static void membarrier_register_private_expedited(void) | |||
117 | */ | 217 | */ |
118 | if (atomic_read(&mm->membarrier_state) | 218 | if (atomic_read(&mm->membarrier_state) |
119 | & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) | 219 | & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) |
120 | return; | 220 | return 0; |
121 | atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); | 221 | atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); |
122 | if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { | 222 | if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { |
123 | /* | 223 | /* |
@@ -128,6 +228,7 @@ static void membarrier_register_private_expedited(void) | |||
128 | } | 228 | } |
129 | atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, | 229 | atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, |
130 | &mm->membarrier_state); | 230 | &mm->membarrier_state); |
231 | return 0; | ||
131 | } | 232 | } |
132 | 233 | ||
133 | /** | 234 | /** |
@@ -167,21 +268,24 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) | |||
167 | int cmd_mask = MEMBARRIER_CMD_BITMASK; | 268 | int cmd_mask = MEMBARRIER_CMD_BITMASK; |
168 | 269 | ||
169 | if (tick_nohz_full_enabled()) | 270 | if (tick_nohz_full_enabled()) |
170 | cmd_mask &= ~MEMBARRIER_CMD_SHARED; | 271 | cmd_mask &= ~MEMBARRIER_CMD_GLOBAL; |
171 | return cmd_mask; | 272 | return cmd_mask; |
172 | } | 273 | } |
173 | case MEMBARRIER_CMD_SHARED: | 274 | case MEMBARRIER_CMD_GLOBAL: |
174 | /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ | 275 | /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */ |
175 | if (tick_nohz_full_enabled()) | 276 | if (tick_nohz_full_enabled()) |
176 | return -EINVAL; | 277 | return -EINVAL; |
177 | if (num_online_cpus() > 1) | 278 | if (num_online_cpus() > 1) |
178 | synchronize_sched(); | 279 | synchronize_sched(); |
179 | return 0; | 280 | return 0; |
281 | case MEMBARRIER_CMD_GLOBAL_EXPEDITED: | ||
282 | return membarrier_global_expedited(); | ||
283 | case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: | ||
284 | return membarrier_register_global_expedited(); | ||
180 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: | 285 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: |
181 | return membarrier_private_expedited(); | 286 | return membarrier_private_expedited(); |
182 | case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: | 287 | case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: |
183 | membarrier_register_private_expedited(); | 288 | return membarrier_register_private_expedited(); |
184 | return 0; | ||
185 | default: | 289 | default: |
186 | return -EINVAL; | 290 | return -EINVAL; |
187 | } | 291 | } |