aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/kmem.h2
-rw-r--r--include/linux/gfp.h8
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/sched/mm.h26
-rw-r--r--kernel/locking/lockdep.c6
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/vmscan.c6
7 files changed, 47 insertions, 19 deletions
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index a6c8da40c70d..d6ea520162b2 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -50,7 +50,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
50 lflags = GFP_ATOMIC | __GFP_NOWARN; 50 lflags = GFP_ATOMIC | __GFP_NOWARN;
51 } else { 51 } else {
52 lflags = GFP_KERNEL | __GFP_NOWARN; 52 lflags = GFP_KERNEL | __GFP_NOWARN;
53 if ((current->flags & PF_MEMALLOC_NOFS) || (flags & KM_NOFS)) 53 if (flags & KM_NOFS)
54 lflags &= ~__GFP_FS; 54 lflags &= ~__GFP_FS;
55 } 55 }
56 56
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 978232a3b4ae..2bfcfd33e476 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -210,8 +210,16 @@ struct vm_area_struct;
210 * 210 *
211 * GFP_NOIO will use direct reclaim to discard clean pages or slab pages 211 * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
212 * that do not require the starting of any physical IO. 212 * that do not require the starting of any physical IO.
213 * Please try to avoid using this flag directly and instead use
214 * memalloc_noio_{save,restore} to mark the whole scope which cannot
215 * perform any IO with a short explanation why. All allocation requests
216 * will inherit GFP_NOIO implicitly.
213 * 217 *
214 * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. 218 * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
219 * Please try to avoid using this flag directly and instead use
220 * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
221 * recurse into the FS layer with a short explanation why. All allocation
222 * requests will inherit GFP_NOFS implicitly.
215 * 223 *
216 * GFP_USER is for userspace allocations that also need to be directly 224 * GFP_USER is for userspace allocations that also need to be directly
217 * accessibly by the kernel or hardware. It is typically used by hardware 225 * accessibly by the kernel or hardware. It is typically used by hardware
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ac11465ac5b..993e7e25a3a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1224,9 +1224,9 @@ extern struct pid *cad_pid;
1224#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ 1224#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */
1225#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ 1225#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
1226#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ 1226#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
1227#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ 1227#define PF_KSWAPD 0x00020000 /* I am kswapd */
1228#define PF_KSWAPD 0x00040000 /* I am kswapd */ 1228#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
1229#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ 1229#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
1230#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 1230#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1231#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 1231#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1232#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ 1232#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
@@ -1237,8 +1237,6 @@ extern struct pid *cad_pid;
1237#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ 1237#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
1238#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ 1238#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
1239 1239
1240#define PF_MEMALLOC_NOFS PF_FSTRANS /* Transition to a more generic GFP_NOFS scope semantic */
1241
1242/* 1240/*
1243 * Only the _current_ task can read/write to tsk->flags, but other 1241 * Only the _current_ task can read/write to tsk->flags, but other
1244 * tasks can access tsk->flags in readonly mode for example 1242 * tasks can access tsk->flags in readonly mode for example
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 830953ebb391..9daabe138c99 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -149,13 +149,21 @@ static inline bool in_vfork(struct task_struct *tsk)
149 return ret; 149 return ret;
150} 150}
151 151
152/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags 152/*
153 * __GFP_FS is also cleared as it implies __GFP_IO. 153 * Applies per-task gfp context to the given allocation flags.
154 * PF_MEMALLOC_NOIO implies GFP_NOIO
155 * PF_MEMALLOC_NOFS implies GFP_NOFS
154 */ 156 */
155static inline gfp_t memalloc_noio_flags(gfp_t flags) 157static inline gfp_t current_gfp_context(gfp_t flags)
156{ 158{
159 /*
160 * NOIO implies both NOIO and NOFS and it is a weaker context
161 * so always make sure it makes precendence
162 */
157 if (unlikely(current->flags & PF_MEMALLOC_NOIO)) 163 if (unlikely(current->flags & PF_MEMALLOC_NOIO))
158 flags &= ~(__GFP_IO | __GFP_FS); 164 flags &= ~(__GFP_IO | __GFP_FS);
165 else if (unlikely(current->flags & PF_MEMALLOC_NOFS))
166 flags &= ~__GFP_FS;
159 return flags; 167 return flags;
160} 168}
161 169
@@ -171,4 +179,16 @@ static inline void memalloc_noio_restore(unsigned int flags)
171 current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; 179 current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
172} 180}
173 181
182static inline unsigned int memalloc_nofs_save(void)
183{
184 unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
185 current->flags |= PF_MEMALLOC_NOFS;
186 return flags;
187}
188
189static inline void memalloc_nofs_restore(unsigned int flags)
190{
191 current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
192}
193
174#endif /* _LINUX_SCHED_MM_H */ 194#endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index f84294c9a018..fd440b5a3c75 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2877,7 +2877,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2877 if (unlikely(!debug_locks)) 2877 if (unlikely(!debug_locks))
2878 return; 2878 return;
2879 2879
2880 gfp_mask = memalloc_noio_flags(gfp_mask); 2880 gfp_mask = current_gfp_context(gfp_mask);
2881 2881
2882 /* no reclaim without waiting on it */ 2882 /* no reclaim without waiting on it */
2883 if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) 2883 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -2888,7 +2888,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2888 return; 2888 return;
2889 2889
2890 /* We're only interested __GFP_FS allocations for now */ 2890 /* We're only interested __GFP_FS allocations for now */
2891 if (!(gfp_mask & __GFP_FS)) 2891 if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
2892 return; 2892 return;
2893 2893
2894 /* 2894 /*
@@ -3954,7 +3954,7 @@ EXPORT_SYMBOL_GPL(lock_unpin_lock);
3954 3954
3955void lockdep_set_current_reclaim_state(gfp_t gfp_mask) 3955void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
3956{ 3956{
3957 current->lockdep_reclaim_gfp = memalloc_noio_flags(gfp_mask); 3957 current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
3958} 3958}
3959 3959
3960void lockdep_clear_current_reclaim_state(void) 3960void lockdep_clear_current_reclaim_state(void)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 34ac32428de8..7a3751e53f91 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3951,10 +3951,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3951 goto out; 3951 goto out;
3952 3952
3953 /* 3953 /*
3954 * Runtime PM, block IO and its error handling path can deadlock 3954 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
3955 * because I/O on the device might not complete. 3955 * resp. GFP_NOIO which has to be inherited for all allocation requests
3956 * from a particular context which has been marked by
3957 * memalloc_no{fs,io}_{save,restore}.
3956 */ 3958 */
3957 alloc_mask = memalloc_noio_flags(gfp_mask); 3959 alloc_mask = current_gfp_context(gfp_mask);
3958 ac.spread_dirty_pages = false; 3960 ac.spread_dirty_pages = false;
3959 3961
3960 /* 3962 /*
@@ -7408,7 +7410,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
7408 .zone = page_zone(pfn_to_page(start)), 7410 .zone = page_zone(pfn_to_page(start)),
7409 .mode = MIGRATE_SYNC, 7411 .mode = MIGRATE_SYNC,
7410 .ignore_skip_hint = true, 7412 .ignore_skip_hint = true,
7411 .gfp_mask = memalloc_noio_flags(gfp_mask), 7413 .gfp_mask = current_gfp_context(gfp_mask),
7412 }; 7414 };
7413 INIT_LIST_HEAD(&cc.migratepages); 7415 INIT_LIST_HEAD(&cc.migratepages);
7414 7416
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ec4555369e17..3ad66580b8b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2915,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2915 unsigned long nr_reclaimed; 2915 unsigned long nr_reclaimed;
2916 struct scan_control sc = { 2916 struct scan_control sc = {
2917 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2917 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2918 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 2918 .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)),
2919 .reclaim_idx = gfp_zone(gfp_mask), 2919 .reclaim_idx = gfp_zone(gfp_mask),
2920 .order = order, 2920 .order = order,
2921 .nodemask = nodemask, 2921 .nodemask = nodemask,
@@ -2995,7 +2995,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2995 int nid; 2995 int nid;
2996 struct scan_control sc = { 2996 struct scan_control sc = {
2997 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), 2997 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
2998 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2998 .gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
2999 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2999 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
3000 .reclaim_idx = MAX_NR_ZONES - 1, 3000 .reclaim_idx = MAX_NR_ZONES - 1,
3001 .target_mem_cgroup = memcg, 3001 .target_mem_cgroup = memcg,
@@ -3702,7 +3702,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3702 int classzone_idx = gfp_zone(gfp_mask); 3702 int classzone_idx = gfp_zone(gfp_mask);
3703 struct scan_control sc = { 3703 struct scan_control sc = {
3704 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), 3704 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
3705 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 3705 .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)),
3706 .order = order, 3706 .order = order,
3707 .priority = NODE_RECLAIM_PRIORITY, 3707 .priority = NODE_RECLAIM_PRIORITY,
3708 .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE), 3708 .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),