aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>2017-10-19 13:30:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-19 22:13:40 -0400
commita961e40917fb14614d368d8bc9782ca4d6a8cd11 (patch)
treebb2778144007d317d197b4f6cb5816ae91afb300
parent96f893abc87bd29339c973edf6068a064cb8a756 (diff)
membarrier: Provide register expedited private command
This introduces a "register private expedited" membarrier command which allows eventual removal of important memory barrier constraints on the scheduler fast-paths. It changes how the "private expedited" membarrier command (new to 4.14) is used from user-space. This new command allows processes to register their intent to use the private expedited command. This affects how the expedited private command introduced in 4.14-rc is meant to be used, and should be merged before 4.14 final. Processes are now required to register before using MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM. This fixes a problem that arose when designing requested extensions to sys_membarrier() to allow JITs to efficiently flush old code from instruction caches. Several potential algorithms are much less painful if the user register intent to use this functionality early on, for example, before the process spawns the second thread. Registering at this time removes the need to interrupt each and every thread in that process at the first expedited sys_membarrier() system call. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/exec.c1
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/sched/mm.h16
-rw-r--r--include/uapi/linux/membarrier.h23
-rw-r--r--kernel/sched/membarrier.c34
5 files changed, 66 insertions, 11 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 5470d3c1892a..3e14ba25f678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
1802 /* execve succeeded */ 1802 /* execve succeeded */
1803 current->fs->in_exec = 0; 1803 current->fs->in_exec = 0;
1804 current->in_execve = 0; 1804 current->in_execve = 0;
1805 membarrier_execve(current);
1805 acct_update_integrals(current); 1806 acct_update_integrals(current);
1806 task_numa_free(current); 1807 task_numa_free(current);
1807 free_bprm(bprm); 1808 free_bprm(bprm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5479a..1861ea8dba77 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,6 +445,9 @@ struct mm_struct {
445 unsigned long flags; /* Must use atomic bitops to access the bits */ 445 unsigned long flags; /* Must use atomic bitops to access the bits */
446 446
447 struct core_state *core_state; /* coredumping support */ 447 struct core_state *core_state; /* coredumping support */
448#ifdef CONFIG_MEMBARRIER
449 atomic_t membarrier_state;
450#endif
448#ifdef CONFIG_AIO 451#ifdef CONFIG_AIO
449 spinlock_t ioctx_lock; 452 spinlock_t ioctx_lock;
450 struct kioctx_table __rcu *ioctx_table; 453 struct kioctx_table __rcu *ioctx_table;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index ae53e413fb13..ab9bf7b73954 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
211 current->flags = (current->flags & ~PF_MEMALLOC) | flags; 211 current->flags = (current->flags & ~PF_MEMALLOC) | flags;
212} 212}
213 213
214#ifdef CONFIG_MEMBARRIER
215enum {
216 MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
217 MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
218};
219
220static inline void membarrier_execve(struct task_struct *t)
221{
222 atomic_set(&t->mm->membarrier_state, 0);
223}
224#else
225static inline void membarrier_execve(struct task_struct *t)
226{
227}
228#endif
229
214#endif /* _LINUX_SCHED_MM_H */ 230#endif /* _LINUX_SCHED_MM_H */
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index 6d47b3249d8a..4e01ad7ffe98 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -52,21 +52,30 @@
52 * (non-running threads are de facto in such a 52 * (non-running threads are de facto in such a
53 * state). This only covers threads from the 53 * state). This only covers threads from the
54 * same processes as the caller thread. This 54 * same processes as the caller thread. This
55 * command returns 0. The "expedited" commands 55 * command returns 0 on success. The
56 * complete faster than the non-expedited ones, 56 * "expedited" commands complete faster than
57 * they never block, but have the downside of 57 * the non-expedited ones, they never block,
58 * causing extra overhead. 58 * but have the downside of causing extra
59 * overhead. A process needs to register its
60 * intent to use the private expedited command
61 * prior to using it, otherwise this command
62 * returns -EPERM.
63 * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
64 * Register the process intent to use
65 * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
66 * returns 0.
59 * 67 *
60 * Command to be passed to the membarrier system call. The commands need to 68 * Command to be passed to the membarrier system call. The commands need to
61 * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to 69 * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
62 * the value 0. 70 * the value 0.
63 */ 71 */
64enum membarrier_cmd { 72enum membarrier_cmd {
65 MEMBARRIER_CMD_QUERY = 0, 73 MEMBARRIER_CMD_QUERY = 0,
66 MEMBARRIER_CMD_SHARED = (1 << 0), 74 MEMBARRIER_CMD_SHARED = (1 << 0),
67 /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ 75 /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
68 /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ 76 /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
69 MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), 77 MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
78 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
70}; 79};
71 80
72#endif /* _UAPI_LINUX_MEMBARRIER_H */ 81#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index a92fddc22747..dd7908743dab 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
18#include <linux/membarrier.h> 18#include <linux/membarrier.h>
19#include <linux/tick.h> 19#include <linux/tick.h>
20#include <linux/cpumask.h> 20#include <linux/cpumask.h>
21#include <linux/atomic.h>
21 22
22#include "sched.h" /* for cpu_rq(). */ 23#include "sched.h" /* for cpu_rq(). */
23 24
@@ -26,21 +27,26 @@
26 * except MEMBARRIER_CMD_QUERY. 27 * except MEMBARRIER_CMD_QUERY.
27 */ 28 */
28#define MEMBARRIER_CMD_BITMASK \ 29#define MEMBARRIER_CMD_BITMASK \
29 (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) 30 (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
31 | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
30 32
31static void ipi_mb(void *info) 33static void ipi_mb(void *info)
32{ 34{
33 smp_mb(); /* IPIs should be serializing but paranoid. */ 35 smp_mb(); /* IPIs should be serializing but paranoid. */
34} 36}
35 37
36static void membarrier_private_expedited(void) 38static int membarrier_private_expedited(void)
37{ 39{
38 int cpu; 40 int cpu;
39 bool fallback = false; 41 bool fallback = false;
40 cpumask_var_t tmpmask; 42 cpumask_var_t tmpmask;
41 43
44 if (!(atomic_read(&current->mm->membarrier_state)
45 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
46 return -EPERM;
47
42 if (num_online_cpus() == 1) 48 if (num_online_cpus() == 1)
43 return; 49 return 0;
44 50
45 /* 51 /*
46 * Matches memory barriers around rq->curr modification in 52 * Matches memory barriers around rq->curr modification in
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
94 * rq->curr modification in scheduler. 100 * rq->curr modification in scheduler.
95 */ 101 */
96 smp_mb(); /* exit from system call is not a mb */ 102 smp_mb(); /* exit from system call is not a mb */
103 return 0;
104}
105
106static void membarrier_register_private_expedited(void)
107{
108 struct task_struct *p = current;
109 struct mm_struct *mm = p->mm;
110
111 /*
112 * We need to consider threads belonging to different thread
113 * groups, which use the same mm. (CLONE_VM but not
114 * CLONE_THREAD).
115 */
116 if (atomic_read(&mm->membarrier_state)
117 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
118 return;
119 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
120 &mm->membarrier_state);
97} 121}
98 122
99/** 123/**
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
144 synchronize_sched(); 168 synchronize_sched();
145 return 0; 169 return 0;
146 case MEMBARRIER_CMD_PRIVATE_EXPEDITED: 170 case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
147 membarrier_private_expedited(); 171 return membarrier_private_expedited();
172 case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
173 membarrier_register_private_expedited();
148 return 0; 174 return 0;
149 default: 175 default:
150 return -EINVAL; 176 return -EINVAL;