1 files changed, 199 insertions, 87 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..8942af0813e3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
 #define ___GFP_HIGHMEM          0x02u
 #define ___GFP_DMA32            0x04u
 #define ___GFP_MOVABLE          0x08u
-#define ___GFP_WAIT             0x10u
+#define ___GFP_RECLAIMABLE      0x10u
 #define ___GFP_HIGH             0x20u
 #define ___GFP_IO               0x40u
 #define ___GFP_FS               0x80u
@@ -29,18 +29,17 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC       0x10000u
 #define ___GFP_HARDWALL         0x20000u
 #define ___GFP_THISNODE         0x40000u
-#define ___GFP_RECLAIMABLE      0x80000u
+#define ___GFP_ATOMIC           0x80000u
 #define ___GFP_NOACCOUNT        0x100000u
 #define ___GFP_NOTRACK          0x200000u
-#define ___GFP_NO_KSWAPD        0x400000u
+#define ___GFP_DIRECT_RECLAIM   0x400000u
 #define ___GFP_OTHER_NODE       0x800000u
 #define ___GFP_WRITE            0x1000000u
+#define ___GFP_KSWAPD_RECLAIM   0x2000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 /*
- * GFP bitmasks..
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
- *
- * Zone modifiers (see linux/mmzone.h - low three bits)
 *
 * Do not put any conditional on these. If necessary modify the definitions
 * without the underscores and use them consistently. The definitions here may
@@ -50,116 +49,229 @@ struct vm_area_struct;
 #define __GFP_HIGHMEM   ((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32     ((__force gfp_t)___GFP_DMA32)
 #define __GFP_MOVABLE   ((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
+#define __GFP_MOVABLE   ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK    (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+/*
+ * Page mobility and placement hints
+ *
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
+ *
+ * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ *   moved by page migration during memory compaction or can be reclaimed.
+ *
+ * __GFP_RECLAIMABLE is used for slab allocations that specify
+ *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ *   these pages will be spread between local zones to avoid all the dirty
+ *   pages being in one zone (fair zone allocation policy).
+ *
+ * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * __GFP_THISNODE forces the allocation to be satisified from the requested
+ *   node with no fallbacks or placement policy enforcements.
+ */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE     ((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE  ((__force gfp_t)___GFP_THISNODE)
 /*
- * Action modifiers - doesn't change the zoning
+ * Watermark modifiers -- controls access to emergency reserves
+ *
+ * __GFP_HIGH indicates that the caller is high-priority and that granting
+ *   the request is necessary before the system can make forward progress.
+ *   For example, creating an IO context to clean pages.
+ *
+ * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ *   high priority. Users are typically interrupt handlers. This may be
+ *   used in conjunction with __GFP_HIGH
+ *
+ * __GFP_MEMALLOC allows access to all memory. This should only be used when
+ *   the caller guarantees the allocation will allow more memory to be freed
+ *   very shortly e.g. process exiting or swapping. Users either should
+ *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ *
+ * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ *
+ * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
+ */
+#define __GFP_ATOMIC    ((__force gfp_t)___GFP_ATOMIC)
+#define __GFP_HIGH      ((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC  ((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT)
+/*
+ * Reclaim modifiers
+ *
+ * __GFP_IO can start physical IO.
+ *
+ * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ *   allocator recursing into the filesystem which might already be holding
+ *   locks.
+ *
+ * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ *   This flag can be cleared to avoid unnecessary delays when a fallback
+ *   option is available.
+ *
+ * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ *   the low watermark is reached and have it reclaim pages until the high
+ *   watermark is reached. A caller may wish to clear this flag when fallback
+ *   options are available and the reclaim is likely to disrupt the system. The
+ *   canonical example is THP allocation where a fallback is cheap but
+ *   reclaim/compaction may cause indirect stalls.
+ *
+ * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail.  This depends upon the particular VM implementation.
+ *   _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
+ *   cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure policy)
+ *   (and the flag should be used only when there is no reasonable failure
- * but it is definitely preferable to use the flag rather than opencode endless
+ *   policy) but it is definitely preferable to use the flag rather than
- * loop around allocator.
+ *   opencode endless loop around allocator.
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
+ *   return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed.  The OOM killer is not called with the current
+ *   the allocation to succeed.  The OOM killer is not called with the current
- * implementation.
+ *   implementation.
- *
- * __GFP_MOVABLE: Flag that this page will be movable by the page migration
- * mechanism or reclaimed
 */
-#define __GFP_WAIT      ((__force gfp_t)___GFP_WAIT)    /* Can wait and reschedule? */
+#define __GFP_IO        ((__force gfp_t)___GFP_IO)
-#define __GFP_HIGH      ((__force gfp_t)___GFP_HIGH)    /* Should access emergency pools? */
+#define __GFP_FS        ((__force gfp_t)___GFP_FS)
-#define __GFP_IO        ((__force gfp_t)___GFP_IO)      /* Can start physical IO? */
+#define __GFP_DIRECT_RECLAIM    ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
-#define __GFP_FS        ((__force gfp_t)___GFP_FS)      /* Can call down to low-level FS? */
+#define __GFP_KSWAPD_RECLAIM    ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
-#define __GFP_COLD      ((__force gfp_t)___GFP_COLD)    /* Cache-cold page required */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_NOWARN    ((__force gfp_t)___GFP_NOWARN)  /* Suppress page allocation failure warning */
+#define __GFP_REPEAT    ((__force gfp_t)___GFP_REPEAT)
-#define __GFP_REPEAT    ((__force gfp_t)___GFP_REPEAT)  /* See above */
+#define __GFP_NOFAIL    ((__force gfp_t)___GFP_NOFAIL)
-#define __GFP_NOFAIL    ((__force gfp_t)___GFP_NOFAIL)  /* See above */
+#define __GFP_NORETRY   ((__force gfp_t)___GFP_NORETRY)
-#define __GFP_NORETRY   ((__force gfp_t)___GFP_NORETRY) /* See above */
-#define __GFP_MEMALLOC  ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
-#define __GFP_COMP      ((__force gfp_t)___GFP_COMP)    /* Add compound page metadata */
-#define __GFP_ZERO      ((__force gfp_t)___GFP_ZERO)    /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
-                                                         * This takes precedence over the
-                                                         * __GFP_MEMALLOC flag if both are
-                                                         * set
-                                                         */
-#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
-#define __GFP_THISNODE  ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
-#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
-#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
-#define __GFP_NOTRACK   ((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
-#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_WRITE     ((__force gfp_t)___GFP_WRITE)   /* Allocator intends to dirty page */
 /*
- * This may seem redundant, but it's a way of annotating false positives vs.
+ * Action modifiers
- * allocations that simply cannot be supported (e.g. page tables).
+ *
+ * __GFP_COLD indicates that the caller does not expect to be used in the near
+ *   future. Where possible, a cache-cold page will be returned.
+ *
+ * __GFP_NOWARN suppresses allocation failure reports.
+ *
+ * __GFP_COMP address compound page metadata.
+ *
+ * __GFP_ZERO returns a zeroed page on success.
+ *
+ * __GFP_NOTRACK avoids tracking with kmemcheck.
+ *
+ * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
+ *   distinguishing in the source between false positives and allocations that
+ *   cannot be supported (e.g. page tables).
+ *
+ * __GFP_OTHER_NODE is for allocations that are on a remote node but that
+ *   should not be accounted for as a remote allocation in vmstat. A
+ *   typical user would be khugepaged collapsing a huge page on a remote
+ *   node.
 */
+#define __GFP_COLD      ((__force gfp_t)___GFP_COLD)
+#define __GFP_NOWARN    ((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP      ((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO      ((__force gfp_t)___GFP_ZERO)
+#define __GFP_NOTRACK   ((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
-#define __GFP_BITS_SHIFT 25     /* Room for N __GFP_FOO bits */
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
-/* This equals 0, but use constants in case they ever change */
+/*
-#define GFP_NOWAIT      (GFP_ATOMIC & ~__GFP_HIGH)
+ * Useful GFP flag combinations that are commonly used. It is recommended
-/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
+ * that subsystems start with one of these combinations and then set/clear
-#define GFP_ATOMIC      (__GFP_HIGH)
+ * __GFP_FOO flags as necessary.
-#define GFP_NOIO        (__GFP_WAIT)
+ *
-#define GFP_NOFS        (__GFP_WAIT | __GFP_IO)
+ * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
-#define GFP_KERNEL      (__GFP_WAIT | __GFP_IO | __GFP_FS)
+ *   watermark is applied to allow access to "atomic reserves"
-#define GFP_TEMPORARY   (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ *
+ * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * GFP_NOWAIT is for kernel allocations that should not stall for direct
+ *   reclaim, start physical IO or use any filesystem callback.
+ *
+ * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ *   that do not require the starting of any physical IO.
+ *
+ * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ *
+ * GFP_USER is for userspace allocations that also need to be directly
+ *   accessibly by the kernel or hardware. It is typically used by hardware
+ *   for buffers that are mapped to userspace (e.g. graphics) that hardware
+ *   still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * GFP_DMA exists for historical reasons and should be avoided where possible.
+ *   The flags indicates that the caller requires that the lowest zone be
+ *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ *   it would require careful auditing as some users really require it and
+ *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
+ *   lowest zone as a type of emergency reserve.
+ *
+ * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
+ *   address.
+ *
+ * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ *   do not need to be directly accessible by the kernel but that cannot
+ *   move once in use. An example may be a hardware allocation that maps
+ *   data directly into userspace but has no addressing limitations.
+ *
+ * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ *   need direct access to but can use kmap() when access is required. They
+ *   are expected to be movable via page reclaim or page migration. Typically,
+ *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
+ *
+ * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
+ *   that will fail quickly if memory is not available and will not wake
+ *   kswapd on failure.
+ */
+#define GFP_ATOMIC      (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL      (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT      (__GFP_KSWAPD_RECLAIM)
+#define GFP_NOIO        (__GFP_RECLAIM)
+#define GFP_NOFS        (__GFP_RECLAIM | __GFP_IO)
+#define GFP_TEMPORARY   (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
                         __GFP_RECLAIMABLE)
-#define GFP_USER        (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER        (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA         __GFP_DMA
+#define GFP_DMA32       __GFP_DMA32
 #define GFP_HIGHUSER    (GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE    (GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS        (__GFP_IO | __GFP_FS)
+#define GFP_TRANSHUGE   ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-#define GFP_TRANSHUGE   (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
-                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+                         ~__GFP_KSWAPD_RECLAIM)
-                         __GFP_NO_KSWAPD)
-/* This mask makes up all the page movable related flags */
+/* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
-/* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-                        __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
-/* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
-/* Control allocation constraints */
-#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
-/* Do not use these with a slab allocator */
-#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
-/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
-   platforms, used as appropriate on others */
-#define GFP_DMA         __GFP_DMA
-/* 4GB DMA on some platforms */
-#define GFP_DMA32       __GFP_DMA32
-/* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
-        WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+        VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+        BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
+        BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
        if (unlikely(page_group_by_mobility_disabled))
                return MIGRATE_UNMOVABLE;
        /* Group based on mobility */
-        return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+        return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
-                ((gfp_flags & __GFP_RECLAIMABLE) != 0);
+}
+#undef GFP_MOVABLE_MASK
+#undef GFP_MOVABLE_SHIFT
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+        return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
 }
 #ifdef CONFIG_HIGHMEM

diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f92cbd2f4450..8942af0813e3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
14	#define ___GFP_HIGHMEM 0x02u	14	#define ___GFP_HIGHMEM 0x02u
15	#define ___GFP_DMA32 0x04u	15	#define ___GFP_DMA32 0x04u
16	#define ___GFP_MOVABLE 0x08u	16	#define ___GFP_MOVABLE 0x08u
17	#define ___GFP_WAIT 0x10u	17	#define ___GFP_RECLAIMABLE 0x10u
18	#define ___GFP_HIGH 0x20u	18	#define ___GFP_HIGH 0x20u
19	#define ___GFP_IO 0x40u	19	#define ___GFP_IO 0x40u
20	#define ___GFP_FS 0x80u	20	#define ___GFP_FS 0x80u
@@ -29,18 +29,17 @@ struct vm_area_struct;
29	#define ___GFP_NOMEMALLOC 0x10000u	29	#define ___GFP_NOMEMALLOC 0x10000u
30	#define ___GFP_HARDWALL 0x20000u	30	#define ___GFP_HARDWALL 0x20000u
31	#define ___GFP_THISNODE 0x40000u	31	#define ___GFP_THISNODE 0x40000u
32	#define ___GFP_RECLAIMABLE 0x80000u	32	#define ___GFP_ATOMIC 0x80000u
33	#define ___GFP_NOACCOUNT 0x100000u	33	#define ___GFP_NOACCOUNT 0x100000u
34	#define ___GFP_NOTRACK 0x200000u	34	#define ___GFP_NOTRACK 0x200000u
35	#define ___GFP_NO_KSWAPD 0x400000u	35	#define ___GFP_DIRECT_RECLAIM 0x400000u
36	#define ___GFP_OTHER_NODE 0x800000u	36	#define ___GFP_OTHER_NODE 0x800000u
37	#define ___GFP_WRITE 0x1000000u	37	#define ___GFP_WRITE 0x1000000u
		38	#define ___GFP_KSWAPD_RECLAIM 0x2000000u
38	/* If the above are modified, __GFP_BITS_SHIFT may need updating */	39	/* If the above are modified, __GFP_BITS_SHIFT may need updating */
39		40
40	/*	41	/*
41	* GFP bitmasks..	42	* Physical address zone modifiers (see linux/mmzone.h - low four bits)
42	*
43	* Zone modifiers (see linux/mmzone.h - low three bits)
44	*	43	*
45	* Do not put any conditional on these. If necessary modify the definitions	44	* Do not put any conditional on these. If necessary modify the definitions
46	* without the underscores and use them consistently. The definitions here may	45	* without the underscores and use them consistently. The definitions here may
@@ -50,116 +49,229 @@ struct vm_area_struct;
50	#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)	49	#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
51	#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)	50	#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
52	#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */	51	#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */
		52	#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
53	#define GFP_ZONEMASK (__GFP_DMA\|__GFP_HIGHMEM\|__GFP_DMA32\|__GFP_MOVABLE)	53	#define GFP_ZONEMASK (__GFP_DMA\|__GFP_HIGHMEM\|__GFP_DMA32\|__GFP_MOVABLE)
		54
		55	/*
		56	* Page mobility and placement hints
		57	*
		58	* These flags provide hints about how mobile the page is. Pages with similar
		59	* mobility are placed within the same pageblocks to minimise problems due
		60	* to external fragmentation.
		61	*
		62	* __GFP_MOVABLE (also a zone modifier) indicates that the page can be
		63	* moved by page migration during memory compaction or can be reclaimed.
		64	*
		65	* __GFP_RECLAIMABLE is used for slab allocations that specify
		66	* SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
		67	*
		68	* __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
		69	* these pages will be spread between local zones to avoid all the dirty
		70	* pages being in one zone (fair zone allocation policy).
		71	*
		72	* __GFP_HARDWALL enforces the cpuset memory allocation policy.
		73	*
		74	* __GFP_THISNODE forces the allocation to be satisified from the requested
		75	* node with no fallbacks or placement policy enforcements.
		76	*/
		77	#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
		78	#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
		79	#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
		80	#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
		81
54	/*	82	/*
55	* Action modifiers - doesn't change the zoning	83	* Watermark modifiers -- controls access to emergency reserves
		84	*
		85	* __GFP_HIGH indicates that the caller is high-priority and that granting
		86	* the request is necessary before the system can make forward progress.
		87	* For example, creating an IO context to clean pages.
		88	*
		89	* __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
		90	* high priority. Users are typically interrupt handlers. This may be
		91	* used in conjunction with __GFP_HIGH
		92	*
		93	* __GFP_MEMALLOC allows access to all memory. This should only be used when
		94	* the caller guarantees the allocation will allow more memory to be freed
		95	* very shortly e.g. process exiting or swapping. Users either should
		96	* be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
		97	*
		98	* __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
		99	* This takes precedence over the __GFP_MEMALLOC flag if both are set.
		100	*
		101	* __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
		102	*/
		103	#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC)
		104	#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
		105	#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
		106	#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
		107	#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT)
		108
		109	/*
		110	* Reclaim modifiers
		111	*
		112	* __GFP_IO can start physical IO.
		113	*
		114	* __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
		115	* allocator recursing into the filesystem which might already be holding
		116	* locks.
		117	*
		118	* __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
		119	* This flag can be cleared to avoid unnecessary delays when a fallback
		120	* option is available.
		121	*
		122	* __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
		123	* the low watermark is reached and have it reclaim pages until the high
		124	* watermark is reached. A caller may wish to clear this flag when fallback
		125	* options are available and the reclaim is likely to disrupt the system. The
		126	* canonical example is THP allocation where a fallback is cheap but
		127	* reclaim/compaction may cause indirect stalls.
		128	*
		129	* __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
56	*	130	*
57	* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt	131	* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
58	* _might_ fail. This depends upon the particular VM implementation.	132	* _might_ fail. This depends upon the particular VM implementation.
59	*	133	*
60	* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller	134	* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
61	* cannot handle allocation failures. New users should be evaluated carefully	135	* cannot handle allocation failures. New users should be evaluated carefully
62	* (and the flag should be used only when there is no reasonable failure policy)	136	* (and the flag should be used only when there is no reasonable failure
63	* but it is definitely preferable to use the flag rather than opencode endless	137	* policy) but it is definitely preferable to use the flag rather than
64	* loop around allocator.	138	* opencode endless loop around allocator.
65	*	139	*
66	* __GFP_NORETRY: The VM implementation must not retry indefinitely and will	140	* __GFP_NORETRY: The VM implementation must not retry indefinitely and will
67	* return NULL when direct reclaim and memory compaction have failed to allow	141	* return NULL when direct reclaim and memory compaction have failed to allow
68	* the allocation to succeed. The OOM killer is not called with the current	142	* the allocation to succeed. The OOM killer is not called with the current
69	* implementation.	143	* implementation.
70	*
71	* __GFP_MOVABLE: Flag that this page will be movable by the page migration
72	* mechanism or reclaimed
73	*/	144	*/
74	#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */	145	#define __GFP_IO ((__force gfp_t)___GFP_IO)
75	#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */	146	#define __GFP_FS ((__force gfp_t)___GFP_FS)
76	#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */	147	#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
77	#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */	148	#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
78	#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */	149	#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM\|___GFP_KSWAPD_RECLAIM))
79	#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */	150	#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT)
80	#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */	151	#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
81	#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */	152	#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
82	#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */
83	#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
84	#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */
85	#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */
86	#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
87	* This takes precedence over the
88	* __GFP_MEMALLOC flag if both are
89	* set
90	*/
91	#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
92	#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
93	#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
94	#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
95	#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
96
97	#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
98	#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
99	#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
100		153
101	/*	154	/*
102	* This may seem redundant, but it's a way of annotating false positives vs.	155	* Action modifiers
103	* allocations that simply cannot be supported (e.g. page tables).	156	*
		157	* __GFP_COLD indicates that the caller does not expect to be used in the near
		158	* future. Where possible, a cache-cold page will be returned.
		159	*
		160	* __GFP_NOWARN suppresses allocation failure reports.
		161	*
		162	* __GFP_COMP address compound page metadata.
		163	*
		164	* __GFP_ZERO returns a zeroed page on success.
		165	*
		166	* __GFP_NOTRACK avoids tracking with kmemcheck.
		167	*
		168	* __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
		169	* distinguishing in the source between false positives and allocations that
		170	* cannot be supported (e.g. page tables).
		171	*
		172	* __GFP_OTHER_NODE is for allocations that are on a remote node but that
		173	* should not be accounted for as a remote allocation in vmstat. A
		174	* typical user would be khugepaged collapsing a huge page on a remote
		175	* node.
104	*/	176	*/
		177	#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
		178	#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
		179	#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
		180	#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
		181	#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK)
105	#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)	182	#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
		183	#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
106		184
107	#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */	185	/* Room for N __GFP_FOO bits */
		186	#define __GFP_BITS_SHIFT 26
108	#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))	187	#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
109		188
110	/* This equals 0, but use constants in case they ever change */	189	/*
111	#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)	190	* Useful GFP flag combinations that are commonly used. It is recommended
112	/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */	191	* that subsystems start with one of these combinations and then set/clear
113	#define GFP_ATOMIC (__GFP_HIGH)	192	* __GFP_FOO flags as necessary.
114	#define GFP_NOIO (__GFP_WAIT)	193	*
115	#define GFP_NOFS (__GFP_WAIT \| __GFP_IO)	194	* GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
116	#define GFP_KERNEL (__GFP_WAIT \| __GFP_IO \| __GFP_FS)	195	* watermark is applied to allow access to "atomic reserves"
117	#define GFP_TEMPORARY (__GFP_WAIT \| __GFP_IO \| __GFP_FS \| \	196	*
		197	* GFP_KERNEL is typical for kernel-internal allocations. The caller requires
		198	* ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
		199	*
		200	* GFP_NOWAIT is for kernel allocations that should not stall for direct
		201	* reclaim, start physical IO or use any filesystem callback.
		202	*
		203	* GFP_NOIO will use direct reclaim to discard clean pages or slab pages
		204	* that do not require the starting of any physical IO.
		205	*
		206	* GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
		207	*
		208	* GFP_USER is for userspace allocations that also need to be directly
		209	* accessibly by the kernel or hardware. It is typically used by hardware
		210	* for buffers that are mapped to userspace (e.g. graphics) that hardware
		211	* still must DMA to. cpuset limits are enforced for these allocations.
		212	*
		213	* GFP_DMA exists for historical reasons and should be avoided where possible.
		214	* The flags indicates that the caller requires that the lowest zone be
		215	* used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
		216	* it would require careful auditing as some users really require it and
		217	* others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
		218	* lowest zone as a type of emergency reserve.
		219	*
		220	* GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
		221	* address.
		222	*
		223	* GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
		224	* do not need to be directly accessible by the kernel but that cannot
		225	* move once in use. An example may be a hardware allocation that maps
		226	* data directly into userspace but has no addressing limitations.
		227	*
		228	* GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
		229	* need direct access to but can use kmap() when access is required. They
		230	* are expected to be movable via page reclaim or page migration. Typically,
		231	* pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
		232	*
		233	* GFP_TRANSHUGE is used for THP allocations. They are compound allocations
		234	* that will fail quickly if memory is not available and will not wake
		235	* kswapd on failure.
		236	*/
		237	#define GFP_ATOMIC (__GFP_HIGH\|__GFP_ATOMIC\|__GFP_KSWAPD_RECLAIM)
		238	#define GFP_KERNEL (__GFP_RECLAIM \| __GFP_IO \| __GFP_FS)
		239	#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
		240	#define GFP_NOIO (__GFP_RECLAIM)
		241	#define GFP_NOFS (__GFP_RECLAIM \| __GFP_IO)
		242	#define GFP_TEMPORARY (__GFP_RECLAIM \| __GFP_IO \| __GFP_FS \| \
118	__GFP_RECLAIMABLE)	243	__GFP_RECLAIMABLE)
119	#define GFP_USER (__GFP_WAIT \| __GFP_IO \| __GFP_FS \| __GFP_HARDWALL)	244	#define GFP_USER (__GFP_RECLAIM \| __GFP_IO \| __GFP_FS \| __GFP_HARDWALL)
		245	#define GFP_DMA __GFP_DMA
		246	#define GFP_DMA32 __GFP_DMA32
120	#define GFP_HIGHUSER (GFP_USER \| __GFP_HIGHMEM)	247	#define GFP_HIGHUSER (GFP_USER \| __GFP_HIGHMEM)
121	#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER \| __GFP_MOVABLE)	248	#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER \| __GFP_MOVABLE)
122	#define GFP_IOFS (__GFP_IO \| __GFP_FS)	249	#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE \| __GFP_COMP \| \
123	#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE \| __GFP_COMP \| \	250	__GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN) & \
124	__GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN \| \	251	~__GFP_KSWAPD_RECLAIM)
125	__GFP_NO_KSWAPD)
126		252
127	/* This mask makes up all the page movable related flags */	253	/* Convert GFP flags to their corresponding migrate type */
128	#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE\|__GFP_MOVABLE)	254	#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE\|__GFP_MOVABLE)
		255	#define GFP_MOVABLE_SHIFT 3
129		256
130	/* Control page allocator reclaim behavior */
131	#define GFP_RECLAIM_MASK (__GFP_WAIT\|__GFP_HIGH\|__GFP_IO\|__GFP_FS\|\
132	__GFP_NOWARN\|__GFP_REPEAT\|__GFP_NOFAIL\|\
133	__GFP_NORETRY\|__GFP_MEMALLOC\|__GFP_NOMEMALLOC)
134
135	/* Control slab gfp mask during early boot */
136	#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT\|__GFP_IO\|__GFP_FS))
137
138	/* Control allocation constraints */
139	#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL\|__GFP_THISNODE)
140
141	/* Do not use these with a slab allocator */
142	#define GFP_SLAB_BUG_MASK (__GFP_DMA32\|__GFP_HIGHMEM\|~__GFP_BITS_MASK)
143
144	/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
145	platforms, used as appropriate on others */
146
147	#define GFP_DMA __GFP_DMA
148
149	/* 4GB DMA on some platforms */
150	#define GFP_DMA32 __GFP_DMA32
151
152	/* Convert GFP flags to their corresponding migrate type */
153	static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)	257	static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
154	{	258	{
155	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);	259	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
		260	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
		261	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
156		262
157	if (unlikely(page_group_by_mobility_disabled))	263	if (unlikely(page_group_by_mobility_disabled))
158	return MIGRATE_UNMOVABLE;	264	return MIGRATE_UNMOVABLE;
159		265
160	/* Group based on mobility */	266	/* Group based on mobility */
161	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) \|	267	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
162	((gfp_flags & __GFP_RECLAIMABLE) != 0);	268	}
		269	#undef GFP_MOVABLE_MASK
		270	#undef GFP_MOVABLE_SHIFT
		271
		272	static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
		273	{
		274	return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
163	}	275	}
164		276
165	#ifdef CONFIG_HIGHMEM	277	#ifdef CONFIG_HIGHMEM