summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/mm_types.h14
-rw-r--r--include/linux/rcuwait.h20
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/sched/mm.h10
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--kernel/exit.c74
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/sched/core.c28
-rw-r--r--kernel/sched/fair.c39
-rw-r--r--kernel/sched/membarrier.c239
-rw-r--r--kernel/sched/sched.h34
-rw-r--r--tools/testing/selftests/membarrier/.gitignore3
-rw-r--r--tools/testing/selftests/membarrier/Makefile5
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h (renamed from tools/testing/selftests/membarrier/membarrier_test.c)40
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c73
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c24
17 files changed, 375 insertions, 250 deletions
diff --git a/fs/exec.c b/fs/exec.c
index f7f6a140856a..555e93c7dec8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
1033 } 1033 }
1034 task_lock(tsk); 1034 task_lock(tsk);
1035 active_mm = tsk->active_mm; 1035 active_mm = tsk->active_mm;
1036 membarrier_exec_mmap(mm);
1036 tsk->mm = mm; 1037 tsk->mm = mm;
1037 tsk->active_mm = mm; 1038 tsk->active_mm = mm;
1038 activate_mm(active_mm, mm); 1039 activate_mm(active_mm, mm);
@@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
1825 /* execve succeeded */ 1826 /* execve succeeded */
1826 current->fs->in_exec = 0; 1827 current->fs->in_exec = 0;
1827 current->in_execve = 0; 1828 current->in_execve = 0;
1828 membarrier_execve(current);
1829 rseq_execve(current); 1829 rseq_execve(current);
1830 acct_update_integrals(current); 1830 acct_update_integrals(current);
1831 task_numa_free(current, false); 1831 task_numa_free(current, false);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5183e0d77dfa..2222fa795284 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -383,6 +383,16 @@ struct mm_struct {
383 unsigned long highest_vm_end; /* highest vma end address */ 383 unsigned long highest_vm_end; /* highest vma end address */
384 pgd_t * pgd; 384 pgd_t * pgd;
385 385
386#ifdef CONFIG_MEMBARRIER
387 /**
388 * @membarrier_state: Flags controlling membarrier behavior.
389 *
390 * This field is close to @pgd to hopefully fit in the same
391 * cache-line, which needs to be touched by switch_mm().
392 */
393 atomic_t membarrier_state;
394#endif
395
386 /** 396 /**
387 * @mm_users: The number of users including userspace. 397 * @mm_users: The number of users including userspace.
388 * 398 *
@@ -452,9 +462,7 @@ struct mm_struct {
452 unsigned long flags; /* Must use atomic bitops to access */ 462 unsigned long flags; /* Must use atomic bitops to access */
453 463
454 struct core_state *core_state; /* coredumping support */ 464 struct core_state *core_state; /* coredumping support */
455#ifdef CONFIG_MEMBARRIER 465
456 atomic_t membarrier_state;
457#endif
458#ifdef CONFIG_AIO 466#ifdef CONFIG_AIO
459 spinlock_t ioctx_lock; 467 spinlock_t ioctx_lock;
460 struct kioctx_table __rcu *ioctx_table; 468 struct kioctx_table __rcu *ioctx_table;
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
6 6
7/* 7/*
8 * rcuwait provides a way of blocking and waking up a single 8 * rcuwait provides a way of blocking and waking up a single
9 * task in an rcu-safe manner; where it is forbidden to use 9 * task in an rcu-safe manner.
10 * after exit_notify(). task_struct is not properly rcu protected,
11 * unless dealing with rcu-aware lists, ie: find_task_by_*().
12 * 10 *
13 * Alternatively we have task_rcu_dereference(), but the return 11 * The only time @task is non-nil is when a user is blocked (or
14 * semantics have different implications which would break the 12 * checking if it needs to) on a condition, and reset as soon as we
15 * wakeup side. The only time @task is non-nil is when a user is 13 * know that the condition has succeeded and are awoken.
16 * blocked (or checking if it needs to) on a condition, and reset
17 * as soon as we know that the condition has succeeded and are
18 * awoken.
19 */ 14 */
20struct rcuwait { 15struct rcuwait {
21 struct task_struct __rcu *task; 16 struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
37 */ 32 */
38#define rcuwait_wait_event(w, condition) \ 33#define rcuwait_wait_event(w, condition) \
39({ \ 34({ \
40 /* \
41 * Complain if we are called after do_exit()/exit_notify(), \
42 * as we cannot rely on the rcu critical region for the \
43 * wakeup side. \
44 */ \
45 WARN_ON(current->exit_state); \
46 \
47 rcu_assign_pointer((w)->task, current); \ 35 rcu_assign_pointer((w)->task, current); \
48 for (;;) { \ 36 for (;;) { \
49 /* \ 37 /* \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70db597d6fd4..2c2e56bd8913 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1130,7 +1130,10 @@ struct task_struct {
1130 1130
1131 struct tlbflush_unmap_batch tlb_ubc; 1131 struct tlbflush_unmap_batch tlb_ubc;
1132 1132
1133 struct rcu_head rcu; 1133 union {
1134 refcount_t rcu_users;
1135 struct rcu_head rcu;
1136 };
1134 1137
1135 /* Cache last used pipe for splice(): */ 1138 /* Cache last used pipe for splice(): */
1136 struct pipe_inode_info *splice_pipe; 1139 struct pipe_inode_info *splice_pipe;
@@ -1839,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
1839 * running or not. 1842 * running or not.
1840 */ 1843 */
1841#ifndef vcpu_is_preempted 1844#ifndef vcpu_is_preempted
1842# define vcpu_is_preempted(cpu) false 1845static inline bool vcpu_is_preempted(int cpu)
1846{
1847 return false;
1848}
1843#endif 1849#endif
1844 1850
1845extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); 1851extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 4a7944078cc3..e6770012db18 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -362,16 +362,16 @@ enum {
362 362
363static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 363static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
364{ 364{
365 if (current->mm != mm)
366 return;
365 if (likely(!(atomic_read(&mm->membarrier_state) & 367 if (likely(!(atomic_read(&mm->membarrier_state) &
366 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) 368 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
367 return; 369 return;
368 sync_core_before_usermode(); 370 sync_core_before_usermode();
369} 371}
370 372
371static inline void membarrier_execve(struct task_struct *t) 373extern void membarrier_exec_mmap(struct mm_struct *mm);
372{ 374
373 atomic_set(&t->mm->membarrier_state, 0);
374}
375#else 375#else
376#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS 376#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
377static inline void membarrier_arch_switch_mm(struct mm_struct *prev, 377static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
380{ 380{
381} 381}
382#endif 382#endif
383static inline void membarrier_execve(struct task_struct *t) 383static inline void membarrier_exec_mmap(struct mm_struct *mm)
384{ 384{
385} 385}
386static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 386static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3d90ed8f75f0..4b1c3b664f51 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
119 __put_task_struct(t); 119 __put_task_struct(t);
120} 120}
121 121
122struct task_struct *task_rcu_dereference(struct task_struct **ptask); 122void put_task_struct_rcu_user(struct task_struct *task);
123 123
124#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT 124#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
125extern int arch_task_struct_size __read_mostly; 125extern int arch_task_struct_size __read_mostly;
diff --git a/kernel/exit.c b/kernel/exit.c
index 22ab6a4bdc51..a46a50d67002 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
182 put_task_struct(tsk); 182 put_task_struct(tsk);
183} 183}
184 184
185void put_task_struct_rcu_user(struct task_struct *task)
186{
187 if (refcount_dec_and_test(&task->rcu_users))
188 call_rcu(&task->rcu, delayed_put_task_struct);
189}
185 190
186void release_task(struct task_struct *p) 191void release_task(struct task_struct *p)
187{ 192{
@@ -222,76 +227,13 @@ repeat:
222 227
223 write_unlock_irq(&tasklist_lock); 228 write_unlock_irq(&tasklist_lock);
224 release_thread(p); 229 release_thread(p);
225 call_rcu(&p->rcu, delayed_put_task_struct); 230 put_task_struct_rcu_user(p);
226 231
227 p = leader; 232 p = leader;
228 if (unlikely(zap_leader)) 233 if (unlikely(zap_leader))
229 goto repeat; 234 goto repeat;
230} 235}
231 236
232/*
233 * Note that if this function returns a valid task_struct pointer (!NULL)
234 * task->usage must remain >0 for the duration of the RCU critical section.
235 */
236struct task_struct *task_rcu_dereference(struct task_struct **ptask)
237{
238 struct sighand_struct *sighand;
239 struct task_struct *task;
240
241 /*
242 * We need to verify that release_task() was not called and thus
243 * delayed_put_task_struct() can't run and drop the last reference
244 * before rcu_read_unlock(). We check task->sighand != NULL,
245 * but we can read the already freed and reused memory.
246 */
247retry:
248 task = rcu_dereference(*ptask);
249 if (!task)
250 return NULL;
251
252 probe_kernel_address(&task->sighand, sighand);
253
254 /*
255 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
256 * was already freed we can not miss the preceding update of this
257 * pointer.
258 */
259 smp_rmb();
260 if (unlikely(task != READ_ONCE(*ptask)))
261 goto retry;
262
263 /*
264 * We've re-checked that "task == *ptask", now we have two different
265 * cases:
266 *
267 * 1. This is actually the same task/task_struct. In this case
268 * sighand != NULL tells us it is still alive.
269 *
270 * 2. This is another task which got the same memory for task_struct.
271 * We can't know this of course, and we can not trust
272 * sighand != NULL.
273 *
274 * In this case we actually return a random value, but this is
275 * correct.
276 *
277 * If we return NULL - we can pretend that we actually noticed that
278 * *ptask was updated when the previous task has exited. Or pretend
279 * that probe_slab_address(&sighand) reads NULL.
280 *
281 * If we return the new task (because sighand is not NULL for any
282 * reason) - this is fine too. This (new) task can't go away before
283 * another gp pass.
284 *
285 * And note: We could even eliminate the false positive if re-read
286 * task->sighand once again to avoid the falsely NULL. But this case
287 * is very unlikely so we don't care.
288 */
289 if (!sighand)
290 return NULL;
291
292 return task;
293}
294
295void rcuwait_wake_up(struct rcuwait *w) 237void rcuwait_wake_up(struct rcuwait *w)
296{ 238{
297 struct task_struct *task; 239 struct task_struct *task;
@@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
311 */ 253 */
312 smp_mb(); /* (B) */ 254 smp_mb(); /* (B) */
313 255
314 /*
315 * Avoid using task_rcu_dereference() magic as long as we are careful,
316 * see comment in rcuwait_wait_event() regarding ->exit_state.
317 */
318 task = rcu_dereference(w->task); 256 task = rcu_dereference(w->task);
319 if (task) 257 if (task)
320 wake_up_process(task); 258 wake_up_process(task);
diff --git a/kernel/fork.c b/kernel/fork.c
index 60763c043aa3..f9572f416126 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -915,10 +915,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
915 tsk->cpus_ptr = &tsk->cpus_mask; 915 tsk->cpus_ptr = &tsk->cpus_mask;
916 916
917 /* 917 /*
918 * One for us, one for whoever does the "release_task()" (usually 918 * One for the user space visible state that goes away when reaped.
919 * parent) 919 * One for the scheduler.
920 */ 920 */
921 refcount_set(&tsk->usage, 2); 921 refcount_set(&tsk->rcu_users, 2);
922 /* One for the rcu users */
923 refcount_set(&tsk->usage, 1);
922#ifdef CONFIG_BLK_DEV_IO_TRACE 924#ifdef CONFIG_BLK_DEV_IO_TRACE
923 tsk->btrace_seq = 0; 925 tsk->btrace_seq = 0;
924#endif 926#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f9a1346a5fa9..7880f4f64d0e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1656,7 +1656,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
1656 if (cpumask_equal(p->cpus_ptr, new_mask)) 1656 if (cpumask_equal(p->cpus_ptr, new_mask))
1657 goto out; 1657 goto out;
1658 1658
1659 if (!cpumask_intersects(new_mask, cpu_valid_mask)) { 1659 dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
1660 if (dest_cpu >= nr_cpu_ids) {
1660 ret = -EINVAL; 1661 ret = -EINVAL;
1661 goto out; 1662 goto out;
1662 } 1663 }
@@ -1677,7 +1678,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
1677 if (cpumask_test_cpu(task_cpu(p), new_mask)) 1678 if (cpumask_test_cpu(task_cpu(p), new_mask))
1678 goto out; 1679 goto out;
1679 1680
1680 dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
1681 if (task_running(rq, p) || p->state == TASK_WAKING) { 1681 if (task_running(rq, p) || p->state == TASK_WAKING) {
1682 struct migration_arg arg = { p, dest_cpu }; 1682 struct migration_arg arg = { p, dest_cpu };
1683 /* Need help from migration thread: drop lock and wait. */ 1683 /* Need help from migration thread: drop lock and wait. */
@@ -3254,7 +3254,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
3254 /* Task is done with its stack. */ 3254 /* Task is done with its stack. */
3255 put_task_stack(prev); 3255 put_task_stack(prev);
3256 3256
3257 put_task_struct(prev); 3257 put_task_struct_rcu_user(prev);
3258 } 3258 }
3259 3259
3260 tick_nohz_task_switch(); 3260 tick_nohz_task_switch();
@@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
3358 else 3358 else
3359 prev->active_mm = NULL; 3359 prev->active_mm = NULL;
3360 } else { // to user 3360 } else { // to user
3361 membarrier_switch_mm(rq, prev->active_mm, next->mm);
3361 /* 3362 /*
3362 * sys_membarrier() requires an smp_mb() between setting 3363 * sys_membarrier() requires an smp_mb() between setting
3363 * rq->curr and returning to userspace. 3364 * rq->curr / membarrier_switch_mm() and returning to userspace.
3364 * 3365 *
3365 * The below provides this either through switch_mm(), or in 3366 * The below provides this either through switch_mm(), or in
3366 * case 'prev->active_mm == next->mm' through 3367 * case 'prev->active_mm == next->mm' through
3367 * finish_task_switch()'s mmdrop(). 3368 * finish_task_switch()'s mmdrop().
3368 */ 3369 */
3369
3370 switch_mm_irqs_off(prev->active_mm, next->mm, next); 3370 switch_mm_irqs_off(prev->active_mm, next->mm, next);
3371 3371
3372 if (!prev->mm) { // from kernel 3372 if (!prev->mm) { // from kernel
@@ -4042,7 +4042,11 @@ static void __sched notrace __schedule(bool preempt)
4042 4042
4043 if (likely(prev != next)) { 4043 if (likely(prev != next)) {
4044 rq->nr_switches++; 4044 rq->nr_switches++;
4045 rq->curr = next; 4045 /*
4046 * RCU users of rcu_dereference(rq->curr) may not see
4047 * changes to task_struct made by pick_next_task().
4048 */
4049 RCU_INIT_POINTER(rq->curr, next);
4046 /* 4050 /*
4047 * The membarrier system call requires each architecture 4051 * The membarrier system call requires each architecture
4048 * to have a full memory barrier after updating 4052 * to have a full memory barrier after updating
@@ -4223,9 +4227,8 @@ static void __sched notrace preempt_schedule_common(void)
4223 4227
4224#ifdef CONFIG_PREEMPTION 4228#ifdef CONFIG_PREEMPTION
4225/* 4229/*
4226 * this is the entry point to schedule() from in-kernel preemption 4230 * This is the entry point to schedule() from in-kernel preemption
4227 * off of preempt_enable. Kernel preemptions off return from interrupt 4231 * off of preempt_enable.
4228 * occur there and call schedule directly.
4229 */ 4232 */
4230asmlinkage __visible void __sched notrace preempt_schedule(void) 4233asmlinkage __visible void __sched notrace preempt_schedule(void)
4231{ 4234{
@@ -4296,7 +4299,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4296#endif /* CONFIG_PREEMPTION */ 4299#endif /* CONFIG_PREEMPTION */
4297 4300
4298/* 4301/*
4299 * this is the entry point to schedule() from kernel preemption 4302 * This is the entry point to schedule() from kernel preemption
4300 * off of irq context. 4303 * off of irq context.
4301 * Note, that this is called and return with irqs disabled. This will 4304 * Note, that this is called and return with irqs disabled. This will
4302 * protect us against recursive calling from irq. 4305 * protect us against recursive calling from irq.
@@ -6069,7 +6072,8 @@ void init_idle(struct task_struct *idle, int cpu)
6069 __set_task_cpu(idle, cpu); 6072 __set_task_cpu(idle, cpu);
6070 rcu_read_unlock(); 6073 rcu_read_unlock();
6071 6074
6072 rq->curr = rq->idle = idle; 6075 rq->idle = idle;
6076 rcu_assign_pointer(rq->curr, idle);
6073 idle->on_rq = TASK_ON_RQ_QUEUED; 6077 idle->on_rq = TASK_ON_RQ_QUEUED;
6074#ifdef CONFIG_SMP 6078#ifdef CONFIG_SMP
6075 idle->on_cpu = 1; 6079 idle->on_cpu = 1;
@@ -6430,8 +6434,6 @@ int sched_cpu_activate(unsigned int cpu)
6430 } 6434 }
6431 rq_unlock_irqrestore(rq, &rf); 6435 rq_unlock_irqrestore(rq, &rf);
6432 6436
6433 update_max_interval();
6434
6435 return 0; 6437 return 0;
6436} 6438}
6437 6439
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d4bbf68c3161..83ab35e2374f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -749,7 +749,6 @@ void init_entity_runnable_average(struct sched_entity *se)
749 /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ 749 /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
750} 750}
751 751
752static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
753static void attach_entity_cfs_rq(struct sched_entity *se); 752static void attach_entity_cfs_rq(struct sched_entity *se);
754 753
755/* 754/*
@@ -1603,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
1603 return; 1602 return;
1604 1603
1605 rcu_read_lock(); 1604 rcu_read_lock();
1606 cur = task_rcu_dereference(&dst_rq->curr); 1605 cur = rcu_dereference(dst_rq->curr);
1607 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) 1606 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
1608 cur = NULL; 1607 cur = NULL;
1609 1608
@@ -4354,21 +4353,16 @@ static inline u64 sched_cfs_bandwidth_slice(void)
4354} 4353}
4355 4354
4356/* 4355/*
4357 * Replenish runtime according to assigned quota and update expiration time. 4356 * Replenish runtime according to assigned quota. We use sched_clock_cpu
4358 * We use sched_clock_cpu directly instead of rq->clock to avoid adding 4357 * directly instead of rq->clock to avoid adding additional synchronization
4359 * additional synchronization around rq->lock. 4358 * around rq->lock.
4360 * 4359 *
4361 * requires cfs_b->lock 4360 * requires cfs_b->lock
4362 */ 4361 */
4363void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) 4362void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
4364{ 4363{
4365 u64 now; 4364 if (cfs_b->quota != RUNTIME_INF)
4366 4365 cfs_b->runtime = cfs_b->quota;
4367 if (cfs_b->quota == RUNTIME_INF)
4368 return;
4369
4370 now = sched_clock_cpu(smp_processor_id());
4371 cfs_b->runtime = cfs_b->quota;
4372} 4366}
4373 4367
4374static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) 4368static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4376,15 +4370,6 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
4376 return &tg->cfs_bandwidth; 4370 return &tg->cfs_bandwidth;
4377} 4371}
4378 4372
4379/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
4380static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
4381{
4382 if (unlikely(cfs_rq->throttle_count))
4383 return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
4384
4385 return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
4386}
4387
4388/* returns 0 on failure to allocate runtime */ 4373/* returns 0 on failure to allocate runtime */
4389static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) 4374static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4390{ 4375{
@@ -4476,7 +4461,6 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
4476 4461
4477 cfs_rq->throttle_count--; 4462 cfs_rq->throttle_count--;
4478 if (!cfs_rq->throttle_count) { 4463 if (!cfs_rq->throttle_count) {
4479 /* adjust cfs_rq_clock_task() */
4480 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - 4464 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
4481 cfs_rq->throttled_clock_task; 4465 cfs_rq->throttled_clock_task;
4482 4466
@@ -4994,15 +4978,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4994 4978
4995void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 4979void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
4996{ 4980{
4997 u64 overrun;
4998
4999 lockdep_assert_held(&cfs_b->lock); 4981 lockdep_assert_held(&cfs_b->lock);
5000 4982
5001 if (cfs_b->period_active) 4983 if (cfs_b->period_active)
5002 return; 4984 return;
5003 4985
5004 cfs_b->period_active = 1; 4986 cfs_b->period_active = 1;
5005 overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); 4987 hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
5006 hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); 4988 hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
5007} 4989}
5008 4990
@@ -5080,11 +5062,6 @@ static inline bool cfs_bandwidth_used(void)
5080 return false; 5062 return false;
5081} 5063}
5082 5064
5083static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
5084{
5085 return rq_clock_task(rq_of(cfs_rq));
5086}
5087
5088static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} 5065static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
5089static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; } 5066static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
5090static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} 5067static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
@@ -6412,7 +6389,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
6412 } 6389 }
6413 6390
6414 /* Evaluate the energy impact of using this CPU. */ 6391 /* Evaluate the energy impact of using this CPU. */
6415 if (max_spare_cap_cpu >= 0) { 6392 if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
6416 cur_delta = compute_energy(p, max_spare_cap_cpu, pd); 6393 cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
6417 cur_delta -= base_energy_pd; 6394 cur_delta -= base_energy_pd;
6418 if (cur_delta < best_delta) { 6395 if (cur_delta < best_delta) {
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..a39bed2c784f 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -30,10 +30,42 @@ static void ipi_mb(void *info)
30 smp_mb(); /* IPIs should be serializing but paranoid. */ 30 smp_mb(); /* IPIs should be serializing but paranoid. */
31} 31}
32 32
33static void ipi_sync_rq_state(void *info)
34{
35 struct mm_struct *mm = (struct mm_struct *) info;
36
37 if (current->mm != mm)
38 return;
39 this_cpu_write(runqueues.membarrier_state,
40 atomic_read(&mm->membarrier_state));
41 /*
42 * Issue a memory barrier after setting
43 * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
44 * guarantee that no memory access following registration is reordered
45 * before registration.
46 */
47 smp_mb();
48}
49
50void membarrier_exec_mmap(struct mm_struct *mm)
51{
52 /*
53 * Issue a memory barrier before clearing membarrier_state to
54 * guarantee that no memory access prior to exec is reordered after
55 * clearing this state.
56 */
57 smp_mb();
58 atomic_set(&mm->membarrier_state, 0);
59 /*
60 * Keep the runqueue membarrier_state in sync with this mm
61 * membarrier_state.
62 */
63 this_cpu_write(runqueues.membarrier_state, 0);
64}
65
33static int membarrier_global_expedited(void) 66static int membarrier_global_expedited(void)
34{ 67{
35 int cpu; 68 int cpu;
36 bool fallback = false;
37 cpumask_var_t tmpmask; 69 cpumask_var_t tmpmask;
38 70
39 if (num_online_cpus() == 1) 71 if (num_online_cpus() == 1)
@@ -45,17 +77,11 @@ static int membarrier_global_expedited(void)
45 */ 77 */
46 smp_mb(); /* system call entry is not a mb. */ 78 smp_mb(); /* system call entry is not a mb. */
47 79
48 /* 80 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
49 * Expedited membarrier commands guarantee that they won't 81 return -ENOMEM;
50 * block, hence the GFP_NOWAIT allocation flag and fallback
51 * implementation.
52 */
53 if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
54 /* Fallback for OOM. */
55 fallback = true;
56 }
57 82
58 cpus_read_lock(); 83 cpus_read_lock();
84 rcu_read_lock();
59 for_each_online_cpu(cpu) { 85 for_each_online_cpu(cpu) {
60 struct task_struct *p; 86 struct task_struct *p;
61 87
@@ -70,23 +96,28 @@ static int membarrier_global_expedited(void)
70 if (cpu == raw_smp_processor_id()) 96 if (cpu == raw_smp_processor_id())
71 continue; 97 continue;
72 98
73 rcu_read_lock(); 99 if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
74 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 100 MEMBARRIER_STATE_GLOBAL_EXPEDITED))
75 if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & 101 continue;
76 MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { 102
77 if (!fallback) 103 /*
78 __cpumask_set_cpu(cpu, tmpmask); 104 * Skip the CPU if it runs a kernel thread. The scheduler
79 else 105 * leaves the prior task mm in place as an optimization when
80 smp_call_function_single(cpu, ipi_mb, NULL, 1); 106 * scheduling a kthread.
81 } 107 */
82 rcu_read_unlock(); 108 p = rcu_dereference(cpu_rq(cpu)->curr);
83 } 109 if (p->flags & PF_KTHREAD)
84 if (!fallback) { 110 continue;
85 preempt_disable(); 111
86 smp_call_function_many(tmpmask, ipi_mb, NULL, 1); 112 __cpumask_set_cpu(cpu, tmpmask);
87 preempt_enable();
88 free_cpumask_var(tmpmask);
89 } 113 }
114 rcu_read_unlock();
115
116 preempt_disable();
117 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
118 preempt_enable();
119
120 free_cpumask_var(tmpmask);
90 cpus_read_unlock(); 121 cpus_read_unlock();
91 122
92 /* 123 /*
@@ -101,22 +132,22 @@ static int membarrier_global_expedited(void)
101static int membarrier_private_expedited(int flags) 132static int membarrier_private_expedited(int flags)
102{ 133{
103 int cpu; 134 int cpu;
104 bool fallback = false;
105 cpumask_var_t tmpmask; 135 cpumask_var_t tmpmask;
136 struct mm_struct *mm = current->mm;
106 137
107 if (flags & MEMBARRIER_FLAG_SYNC_CORE) { 138 if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
108 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) 139 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
109 return -EINVAL; 140 return -EINVAL;
110 if (!(atomic_read(&current->mm->membarrier_state) & 141 if (!(atomic_read(&mm->membarrier_state) &
111 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY)) 142 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
112 return -EPERM; 143 return -EPERM;
113 } else { 144 } else {
114 if (!(atomic_read(&current->mm->membarrier_state) & 145 if (!(atomic_read(&mm->membarrier_state) &
115 MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) 146 MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
116 return -EPERM; 147 return -EPERM;
117 } 148 }
118 149
119 if (num_online_cpus() == 1) 150 if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
120 return 0; 151 return 0;
121 152
122 /* 153 /*
@@ -125,17 +156,11 @@ static int membarrier_private_expedited(int flags)
125 */ 156 */
126 smp_mb(); /* system call entry is not a mb. */ 157 smp_mb(); /* system call entry is not a mb. */
127 158
128 /* 159 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
129 * Expedited membarrier commands guarantee that they won't 160 return -ENOMEM;
130 * block, hence the GFP_NOWAIT allocation flag and fallback
131 * implementation.
132 */
133 if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
134 /* Fallback for OOM. */
135 fallback = true;
136 }
137 161
138 cpus_read_lock(); 162 cpus_read_lock();
163 rcu_read_lock();
139 for_each_online_cpu(cpu) { 164 for_each_online_cpu(cpu) {
140 struct task_struct *p; 165 struct task_struct *p;
141 166
@@ -150,21 +175,17 @@ static int membarrier_private_expedited(int flags)
150 if (cpu == raw_smp_processor_id()) 175 if (cpu == raw_smp_processor_id())
151 continue; 176 continue;
152 rcu_read_lock(); 177 rcu_read_lock();
153 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 178 p = rcu_dereference(cpu_rq(cpu)->curr);
154 if (p && p->mm == current->mm) { 179 if (p && p->mm == mm)
155 if (!fallback) 180 __cpumask_set_cpu(cpu, tmpmask);
156 __cpumask_set_cpu(cpu, tmpmask);
157 else
158 smp_call_function_single(cpu, ipi_mb, NULL, 1);
159 }
160 rcu_read_unlock();
161 }
162 if (!fallback) {
163 preempt_disable();
164 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
165 preempt_enable();
166 free_cpumask_var(tmpmask);
167 } 181 }
182 rcu_read_unlock();
183
184 preempt_disable();
185 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
186 preempt_enable();
187
188 free_cpumask_var(tmpmask);
168 cpus_read_unlock(); 189 cpus_read_unlock();
169 190
170 /* 191 /*
@@ -177,32 +198,78 @@ static int membarrier_private_expedited(int flags)
177 return 0; 198 return 0;
178} 199}
179 200
201static int sync_runqueues_membarrier_state(struct mm_struct *mm)
202{
203 int membarrier_state = atomic_read(&mm->membarrier_state);
204 cpumask_var_t tmpmask;
205 int cpu;
206
207 if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
208 this_cpu_write(runqueues.membarrier_state, membarrier_state);
209
210 /*
211 * For single mm user, we can simply issue a memory barrier
212 * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
213 * mm and in the current runqueue to guarantee that no memory
214 * access following registration is reordered before
215 * registration.
216 */
217 smp_mb();
218 return 0;
219 }
220
221 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
222 return -ENOMEM;
223
224 /*
225 * For mm with multiple users, we need to ensure all future
226 * scheduler executions will observe @mm's new membarrier
227 * state.
228 */
229 synchronize_rcu();
230
231 /*
232 * For each cpu runqueue, if the task's mm match @mm, ensure that all
233 * @mm's membarrier state set bits are also set in in the runqueue's
234 * membarrier state. This ensures that a runqueue scheduling
235 * between threads which are users of @mm has its membarrier state
236 * updated.
237 */
238 cpus_read_lock();
239 rcu_read_lock();
240 for_each_online_cpu(cpu) {
241 struct rq *rq = cpu_rq(cpu);
242 struct task_struct *p;
243
244 p = rcu_dereference(rq->curr);
245 if (p && p->mm == mm)
246 __cpumask_set_cpu(cpu, tmpmask);
247 }
248 rcu_read_unlock();
249
250 preempt_disable();
251 smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
252 preempt_enable();
253
254 free_cpumask_var(tmpmask);
255 cpus_read_unlock();
256
257 return 0;
258}
259
180static int membarrier_register_global_expedited(void) 260static int membarrier_register_global_expedited(void)
181{ 261{
182 struct task_struct *p = current; 262 struct task_struct *p = current;
183 struct mm_struct *mm = p->mm; 263 struct mm_struct *mm = p->mm;
264 int ret;
184 265
185 if (atomic_read(&mm->membarrier_state) & 266 if (atomic_read(&mm->membarrier_state) &
186 MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY) 267 MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
187 return 0; 268 return 0;
188 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state); 269 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
189 if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) { 270 ret = sync_runqueues_membarrier_state(mm);
190 /* 271 if (ret)
191 * For single mm user, single threaded process, we can 272 return ret;
192 * simply issue a memory barrier after setting
193 * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
194 * no memory access following registration is reordered
195 * before registration.
196 */
197 smp_mb();
198 } else {
199 /*
200 * For multi-mm user threads, we need to ensure all
201 * future scheduler executions will observe the new
202 * thread flag state for this mm.
203 */
204 synchronize_rcu();
205 }
206 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, 273 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
207 &mm->membarrier_state); 274 &mm->membarrier_state);
208 275
@@ -213,12 +280,15 @@ static int membarrier_register_private_expedited(int flags)
213{ 280{
214 struct task_struct *p = current; 281 struct task_struct *p = current;
215 struct mm_struct *mm = p->mm; 282 struct mm_struct *mm = p->mm;
216 int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY; 283 int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
284 set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
285 ret;
217 286
218 if (flags & MEMBARRIER_FLAG_SYNC_CORE) { 287 if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
219 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE)) 288 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
220 return -EINVAL; 289 return -EINVAL;
221 state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY; 290 ready_state =
291 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
222 } 292 }
223 293
224 /* 294 /*
@@ -226,20 +296,15 @@ static int membarrier_register_private_expedited(int flags)
226 * groups, which use the same mm. (CLONE_VM but not 296 * groups, which use the same mm. (CLONE_VM but not
227 * CLONE_THREAD). 297 * CLONE_THREAD).
228 */ 298 */
229 if (atomic_read(&mm->membarrier_state) & state) 299 if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
230 return 0; 300 return 0;
231 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
232 if (flags & MEMBARRIER_FLAG_SYNC_CORE) 301 if (flags & MEMBARRIER_FLAG_SYNC_CORE)
233 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE, 302 set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
234 &mm->membarrier_state); 303 atomic_or(set_state, &mm->membarrier_state);
235 if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { 304 ret = sync_runqueues_membarrier_state(mm);
236 /* 305 if (ret)
237 * Ensure all future scheduler executions will observe the 306 return ret;
238 * new thread flag state for this process. 307 atomic_or(ready_state, &mm->membarrier_state);
239 */
240 synchronize_rcu();
241 }
242 atomic_or(state, &mm->membarrier_state);
243 308
244 return 0; 309 return 0;
245} 310}
@@ -253,8 +318,10 @@ static int membarrier_register_private_expedited(int flags)
253 * command specified does not exist, not available on the running 318 * command specified does not exist, not available on the running
254 * kernel, or if the command argument is invalid, this system call 319 * kernel, or if the command argument is invalid, this system call
255 * returns -EINVAL. For a given command, with flags argument set to 0, 320 * returns -EINVAL. For a given command, with flags argument set to 0,
256 * this system call is guaranteed to always return the same value until 321 * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
257 * reboot. 322 * always return the same value until reboot. In addition, it can return
323 * -ENOMEM if there is not enough memory available to perform the system
324 * call.
258 * 325 *
259 * All memory accesses performed in program order from each targeted thread 326 * All memory accesses performed in program order from each targeted thread
260 * is guaranteed to be ordered with respect to sys_membarrier(). If we use 327 * is guaranteed to be ordered with respect to sys_membarrier(). If we use
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b3cb895d14a2..0db2c1b3361e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -911,6 +911,10 @@ struct rq {
911 911
912 atomic_t nr_iowait; 912 atomic_t nr_iowait;
913 913
914#ifdef CONFIG_MEMBARRIER
915 int membarrier_state;
916#endif
917
914#ifdef CONFIG_SMP 918#ifdef CONFIG_SMP
915 struct root_domain *rd; 919 struct root_domain *rd;
916 struct sched_domain __rcu *sd; 920 struct sched_domain __rcu *sd;
@@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
2438static inline bool sched_energy_enabled(void) { return false; } 2442static inline bool sched_energy_enabled(void) { return false; }
2439 2443
2440#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ 2444#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
2445
2446#ifdef CONFIG_MEMBARRIER
2447/*
2448 * The scheduler provides memory barriers required by membarrier between:
2449 * - prior user-space memory accesses and store to rq->membarrier_state,
2450 * - store to rq->membarrier_state and following user-space memory accesses.
2451 * In the same way it provides those guarantees around store to rq->curr.
2452 */
2453static inline void membarrier_switch_mm(struct rq *rq,
2454 struct mm_struct *prev_mm,
2455 struct mm_struct *next_mm)
2456{
2457 int membarrier_state;
2458
2459 if (prev_mm == next_mm)
2460 return;
2461
2462 membarrier_state = atomic_read(&next_mm->membarrier_state);
2463 if (READ_ONCE(rq->membarrier_state) == membarrier_state)
2464 return;
2465
2466 WRITE_ONCE(rq->membarrier_state, membarrier_state);
2467}
2468#else
2469static inline void membarrier_switch_mm(struct rq *rq,
2470 struct mm_struct *prev_mm,
2471 struct mm_struct *next_mm)
2472{
2473}
2474#endif
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
index 020c44f49a9e..f2f7ec0a99b4 100644
--- a/tools/testing/selftests/membarrier/.gitignore
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -1 +1,2 @@
1membarrier_test 1membarrier_test_multi_thread
2membarrier_test_single_thread
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index 97e3bdf3d1e9..34d1c81a2324 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,7 +1,8 @@
1# SPDX-License-Identifier: GPL-2.0-only 1# SPDX-License-Identifier: GPL-2.0-only
2CFLAGS += -g -I../../../../usr/include/ 2CFLAGS += -g -I../../../../usr/include/
3LDLIBS += -lpthread
3 4
4TEST_GEN_PROGS := membarrier_test 5TEST_GEN_PROGS := membarrier_test_single_thread \
6 membarrier_test_multi_thread
5 7
6include ../lib.mk 8include ../lib.mk
7
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index 70b4ddbf126b..186be69f0a59 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -1,10 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE 2#define _GNU_SOURCE
3#include <linux/membarrier.h> 3#include <linux/membarrier.h>
4#include <syscall.h> 4#include <syscall.h>
5#include <stdio.h> 5#include <stdio.h>
6#include <errno.h> 6#include <errno.h>
7#include <string.h> 7#include <string.h>
8#include <pthread.h>
8 9
9#include "../kselftest.h" 10#include "../kselftest.h"
10 11
@@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void)
223 return 0; 224 return 0;
224} 225}
225 226
226static int test_membarrier(void) 227static int test_membarrier_fail(void)
227{ 228{
228 int status; 229 int status;
229 230
@@ -233,10 +234,27 @@ static int test_membarrier(void)
233 status = test_membarrier_flags_fail(); 234 status = test_membarrier_flags_fail();
234 if (status) 235 if (status)
235 return status; 236 return status;
236 status = test_membarrier_global_success(); 237 status = test_membarrier_private_expedited_fail();
237 if (status) 238 if (status)
238 return status; 239 return status;
239 status = test_membarrier_private_expedited_fail(); 240 status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
241 if (status < 0) {
242 ksft_test_result_fail("sys_membarrier() failed\n");
243 return status;
244 }
245 if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
246 status = test_membarrier_private_expedited_sync_core_fail();
247 if (status)
248 return status;
249 }
250 return 0;
251}
252
253static int test_membarrier_success(void)
254{
255 int status;
256
257 status = test_membarrier_global_success();
240 if (status) 258 if (status)
241 return status; 259 return status;
242 status = test_membarrier_register_private_expedited_success(); 260 status = test_membarrier_register_private_expedited_success();
@@ -251,9 +269,6 @@ static int test_membarrier(void)
251 return status; 269 return status;
252 } 270 }
253 if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { 271 if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
254 status = test_membarrier_private_expedited_sync_core_fail();
255 if (status)
256 return status;
257 status = test_membarrier_register_private_expedited_sync_core_success(); 272 status = test_membarrier_register_private_expedited_sync_core_success();
258 if (status) 273 if (status)
259 return status; 274 return status;
@@ -300,14 +315,3 @@ static int test_membarrier_query(void)
300 ksft_test_result_pass("sys_membarrier available\n"); 315 ksft_test_result_pass("sys_membarrier available\n");
301 return 0; 316 return 0;
302} 317}
303
304int main(int argc, char **argv)
305{
306 ksft_print_header();
307 ksft_set_plan(13);
308
309 test_membarrier_query();
310 test_membarrier();
311
312 return ksft_exit_pass();
313}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
new file mode 100644
index 000000000000..ac5613e5b0eb
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -0,0 +1,73 @@
1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <linux/membarrier.h>
4#include <syscall.h>
5#include <stdio.h>
6#include <errno.h>
7#include <string.h>
8#include <pthread.h>
9
10#include "membarrier_test_impl.h"
11
12static int thread_ready, thread_quit;
13static pthread_mutex_t test_membarrier_thread_mutex =
14 PTHREAD_MUTEX_INITIALIZER;
15static pthread_cond_t test_membarrier_thread_cond =
16 PTHREAD_COND_INITIALIZER;
17
18void *test_membarrier_thread(void *arg)
19{
20 pthread_mutex_lock(&test_membarrier_thread_mutex);
21 thread_ready = 1;
22 pthread_cond_broadcast(&test_membarrier_thread_cond);
23 pthread_mutex_unlock(&test_membarrier_thread_mutex);
24
25 pthread_mutex_lock(&test_membarrier_thread_mutex);
26 while (!thread_quit)
27 pthread_cond_wait(&test_membarrier_thread_cond,
28 &test_membarrier_thread_mutex);
29 pthread_mutex_unlock(&test_membarrier_thread_mutex);
30
31 return NULL;
32}
33
34static int test_mt_membarrier(void)
35{
36 int i;
37 pthread_t test_thread;
38
39 pthread_create(&test_thread, NULL,
40 test_membarrier_thread, NULL);
41
42 pthread_mutex_lock(&test_membarrier_thread_mutex);
43 while (!thread_ready)
44 pthread_cond_wait(&test_membarrier_thread_cond,
45 &test_membarrier_thread_mutex);
46 pthread_mutex_unlock(&test_membarrier_thread_mutex);
47
48 test_membarrier_fail();
49
50 test_membarrier_success();
51
52 pthread_mutex_lock(&test_membarrier_thread_mutex);
53 thread_quit = 1;
54 pthread_cond_broadcast(&test_membarrier_thread_cond);
55 pthread_mutex_unlock(&test_membarrier_thread_mutex);
56
57 pthread_join(test_thread, NULL);
58
59 return 0;
60}
61
62int main(int argc, char **argv)
63{
64 ksft_print_header();
65 ksft_set_plan(13);
66
67 test_membarrier_query();
68
69 /* Multi-threaded */
70 test_mt_membarrier();
71
72 return ksft_exit_pass();
73}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
new file mode 100644
index 000000000000..c1c963902854
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -0,0 +1,24 @@
1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <linux/membarrier.h>
4#include <syscall.h>
5#include <stdio.h>
6#include <errno.h>
7#include <string.h>
8#include <pthread.h>
9
10#include "membarrier_test_impl.h"
11
12int main(int argc, char **argv)
13{
14 ksft_print_header();
15 ksft_set_plan(13);
16
17 test_membarrier_query();
18
19 test_membarrier_fail();
20
21 test_membarrier_success();
22
23 return ksft_exit_pass();
24}