aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-10 13:51:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-10 14:03:05 -0500
commitcaf491916b1c1e939a2c7575efb7a77f11fc9bdf (patch)
treef6e62f0a4427c2ba860ad8cd40a31e50be29de15
parent31f8d42d44b48ba72b586ca03e810cbbd21ea16b (diff)
Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage
This reverts commits a50915394f1fc02c2861d3b7ce7014788aa5066e and d7c3b937bdf45f0b844400b7bf6fd3ed50bac604. This is a revert of a revert of a revert. In addition, it reverts the even older i915 change to stop using the __GFP_NO_KSWAPD flag due to the original commits in linux-next. It turns out that the original patch really was bogus, and that the original revert was the correct thing to do after all. We thought we had fixed the problem, and then reverted the revert, but the problem really is fundamental: waking up kswapd simply isn't the right thing to do, and direct reclaim sometimes simply _is_ the right thing to do. When certain allocations fail, we simply should try some direct reclaim, and if that fails, fail the allocation. That's the right thing to do for THP allocations, which can easily fail, and the GPU allocations want to do that too. So starting kswapd is sometimes simply wrong, and removing the flag that said "don't start kswapd" was a mistake. Let's hope we never revisit this mistake again - and certainly not this many times ;) Acked-by: Mel Gorman <mgorman@suse.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c6
-rw-r--r--drivers/mtd/mtdcore.c6
-rw-r--r--include/linux/gfp.h13
-rw-r--r--include/trace/events/gfpflags.h1
-rw-r--r--mm/page_alloc.c7
5 files changed, 20 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 107f09befe92..9b285da4449b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1796,7 +1796,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1796 */ 1796 */
1797 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 1797 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
1798 gfp = mapping_gfp_mask(mapping); 1798 gfp = mapping_gfp_mask(mapping);
1799 gfp |= __GFP_NORETRY | __GFP_NOWARN; 1799 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
1800 gfp &= ~(__GFP_IO | __GFP_WAIT); 1800 gfp &= ~(__GFP_IO | __GFP_WAIT);
1801 for_each_sg(st->sgl, sg, page_count, i) { 1801 for_each_sg(st->sgl, sg, page_count, i) {
1802 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1802 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
@@ -1809,7 +1809,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1809 * our own buffer, now let the real VM do its job and 1809 * our own buffer, now let the real VM do its job and
1810 * go down in flames if truly OOM. 1810 * go down in flames if truly OOM.
1811 */ 1811 */
1812 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN); 1812 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
1813 gfp |= __GFP_IO | __GFP_WAIT; 1813 gfp |= __GFP_IO | __GFP_WAIT;
1814 1814
1815 i915_gem_shrink_all(dev_priv); 1815 i915_gem_shrink_all(dev_priv);
@@ -1817,7 +1817,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1817 if (IS_ERR(page)) 1817 if (IS_ERR(page))
1818 goto err_pages; 1818 goto err_pages;
1819 1819
1820 gfp |= __GFP_NORETRY | __GFP_NOWARN; 1820 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
1821 gfp &= ~(__GFP_IO | __GFP_WAIT); 1821 gfp &= ~(__GFP_IO | __GFP_WAIT);
1822 } 1822 }
1823 1823
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 374c46dff7dd..ec794a72975d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1077,7 +1077,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
1077 * until the request succeeds or until the allocation size falls below 1077 * until the request succeeds or until the allocation size falls below
1078 * the system page size. This attempts to make sure it does not adversely 1078 * the system page size. This attempts to make sure it does not adversely
1079 * impact system performance, so when allocating more than one page, we 1079 * impact system performance, so when allocating more than one page, we
1080 * ask the memory allocator to avoid re-trying. 1080 * ask the memory allocator to avoid re-trying, swapping, writing back
1081 * or performing I/O.
1081 * 1082 *
1082 * Note, this function also makes sure that the allocated buffer is aligned to 1083 * Note, this function also makes sure that the allocated buffer is aligned to
1083 * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. 1084 * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
@@ -1091,7 +1092,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
1091 */ 1092 */
1092void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) 1093void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
1093{ 1094{
1094 gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY; 1095 gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
1096 __GFP_NORETRY | __GFP_NO_KSWAPD;
1095 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); 1097 size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
1096 void *kbuf; 1098 void *kbuf;
1097 1099
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 76e1aa206f57..d0a79678f169 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,9 +30,10 @@ struct vm_area_struct;
30#define ___GFP_HARDWALL 0x20000u 30#define ___GFP_HARDWALL 0x20000u
31#define ___GFP_THISNODE 0x40000u 31#define ___GFP_THISNODE 0x40000u
32#define ___GFP_RECLAIMABLE 0x80000u 32#define ___GFP_RECLAIMABLE 0x80000u
33#define ___GFP_NOTRACK 0x100000u 33#define ___GFP_NOTRACK 0x200000u
34#define ___GFP_OTHER_NODE 0x200000u 34#define ___GFP_NO_KSWAPD 0x400000u
35#define ___GFP_WRITE 0x400000u 35#define ___GFP_OTHER_NODE 0x800000u
36#define ___GFP_WRITE 0x1000000u
36 37
37/* 38/*
38 * GFP bitmasks.. 39 * GFP bitmasks..
@@ -85,6 +86,7 @@ struct vm_area_struct;
85#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ 86#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
86#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ 87#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
87 88
89#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
88#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 90#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
89#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 91#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
90 92
@@ -94,7 +96,7 @@ struct vm_area_struct;
94 */ 96 */
95#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 97#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
96 98
97#define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */ 99#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
98#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 100#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
99 101
100/* This equals 0, but use constants in case they ever change */ 102/* This equals 0, but use constants in case they ever change */
@@ -114,7 +116,8 @@ struct vm_area_struct;
114 __GFP_MOVABLE) 116 __GFP_MOVABLE)
115#define GFP_IOFS (__GFP_IO | __GFP_FS) 117#define GFP_IOFS (__GFP_IO | __GFP_FS)
116#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 118#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
117 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) 119 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
120 __GFP_NO_KSWAPD)
118 121
119#ifdef CONFIG_NUMA 122#ifdef CONFIG_NUMA
120#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) 123#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 9391706e9254..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -36,6 +36,7 @@
36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
39 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
39 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ 40 {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
40 ) : "GFP_NOWAIT" 41 ) : "GFP_NOWAIT"
41 42
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8193809f3de0..7e208f0ad68c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2416 goto nopage; 2416 goto nopage;
2417 2417
2418restart: 2418restart:
2419 wake_all_kswapd(order, zonelist, high_zoneidx, 2419 if (!(gfp_mask & __GFP_NO_KSWAPD))
2420 zone_idx(preferred_zone)); 2420 wake_all_kswapd(order, zonelist, high_zoneidx,
2421 zone_idx(preferred_zone));
2421 2422
2422 /* 2423 /*
2423 * OK, we're below the kswapd watermark and have kicked background 2424 * OK, we're below the kswapd watermark and have kicked background
@@ -2494,7 +2495,7 @@ rebalance:
2494 * system then fail the allocation instead of entering direct reclaim. 2495 * system then fail the allocation instead of entering direct reclaim.
2495 */ 2496 */
2496 if ((deferred_compaction || contended_compaction) && 2497 if ((deferred_compaction || contended_compaction) &&
2497 (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) 2498 (gfp_mask & __GFP_NO_KSWAPD))
2498 goto nopage; 2499 goto nopage;
2499 2500
2500 /* Try direct reclaim and then allocating */ 2501 /* Try direct reclaim and then allocating */