diff options
author | Michal Hocko <mhocko@suse.com> | 2016-07-26 18:24:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 19:19:19 -0400 |
commit | 8a5c743e308dd2b90ad10d1faaa7a1b09173a132 (patch) | |
tree | ba679ecefc77b4a80bd35b00e55ddc1457b14e07 | |
parent | e5e3f4c4f0e95ecbad2f8d2f4f6a29bb8a90226b (diff) |
mm, memcg: use consistent gfp flags during readahead
Vladimir has noticed that we might declare memcg oom even during
readahead because read_pages only uses GFP_KERNEL (with mapping_gfp
restriction) while __do_page_cache_readahead uses
page_cache_alloc_readahead which adds __GFP_NORETRY to prevent from
OOMs. This gfp mask discrepancy is really unfortunate and easily
fixable. Drop page_cache_alloc_readahead() which only has one user and
outsource the gfp_mask logic into readahead_gfp_mask and propagate this
mask from __do_page_cache_readahead down to read_pages.
This alone would have only very limited impact as most filesystems are
implementing ->readpages and the common implementation mpage_readpages
does GFP_KERNEL (with mapping_gfp restriction) again. We can tell it to
use readahead_gfp_mask instead as this function is called only during
readahead as well. The same applies to read_cache_pages.
ext4 has its own ext4_mpage_readpages but the path which has pages !=
NULL can use the same gfp mask. Btrfs, cifs, f2fs and orangefs are
doing a very similar pattern to mpage_readpages so the same can be
applied to them as well.
[akpm@linux-foundation.org: coding-style fixes]
[mhocko@suse.com: restrict gfp mask in mpage_alloc]
Link: http://lkml.kernel.org/r/20160610074223.GC32285@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/1465301556-26431-1-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Chris Mason <clm@fb.com>
Cc: Steve French <sfrench@samba.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Cc: Mike Marshall <hubcap@omnibond.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Changman Lee <cm224.lee@samsung.com>
Cc: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/btrfs/extent_io.c | 3 | ||||
-rw-r--r-- | fs/cifs/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 2 | ||||
-rw-r--r-- | fs/f2fs/data.c | 3 | ||||
-rw-r--r-- | fs/mpage.c | 4 | ||||
-rw-r--r-- | fs/orangefs/inode.c | 2 | ||||
-rw-r--r-- | include/linux/pagemap.h | 6 | ||||
-rw-r--r-- | mm/readahead.c | 13 |
8 files changed, 19 insertions, 16 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 75533adef998..e91d55837dd2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -4180,7 +4180,8 @@ int extent_readpages(struct extent_io_tree *tree, | |||
4180 | prefetchw(&page->flags); | 4180 | prefetchw(&page->flags); |
4181 | list_del(&page->lru); | 4181 | list_del(&page->lru); |
4182 | if (add_to_page_cache_lru(page, mapping, | 4182 | if (add_to_page_cache_lru(page, mapping, |
4183 | page->index, GFP_NOFS)) { | 4183 | page->index, |
4184 | readahead_gfp_mask(mapping))) { | ||
4184 | put_page(page); | 4185 | put_page(page); |
4185 | continue; | 4186 | continue; |
4186 | } | 4187 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d4890b6dc22d..579e41b350a2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -3366,7 +3366,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, | |||
3366 | struct page *page, *tpage; | 3366 | struct page *page, *tpage; |
3367 | unsigned int expected_index; | 3367 | unsigned int expected_index; |
3368 | int rc; | 3368 | int rc; |
3369 | gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); | 3369 | gfp_t gfp = readahead_gfp_mask(mapping); |
3370 | 3370 | ||
3371 | INIT_LIST_HEAD(tmplist); | 3371 | INIT_LIST_HEAD(tmplist); |
3372 | 3372 | ||
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index dc54a4b60eba..c75b66a64982 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c | |||
@@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping, | |||
166 | page = list_entry(pages->prev, struct page, lru); | 166 | page = list_entry(pages->prev, struct page, lru); |
167 | list_del(&page->lru); | 167 | list_del(&page->lru); |
168 | if (add_to_page_cache_lru(page, mapping, page->index, | 168 | if (add_to_page_cache_lru(page, mapping, page->index, |
169 | mapping_gfp_constraint(mapping, GFP_KERNEL))) | 169 | readahead_gfp_mask(mapping))) |
170 | goto next_page; | 170 | goto next_page; |
171 | } | 171 | } |
172 | 172 | ||
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a8bbc1fb1fa..c80dda4bdff8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -996,7 +996,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping, | |||
996 | page = list_entry(pages->prev, struct page, lru); | 996 | page = list_entry(pages->prev, struct page, lru); |
997 | list_del(&page->lru); | 997 | list_del(&page->lru); |
998 | if (add_to_page_cache_lru(page, mapping, | 998 | if (add_to_page_cache_lru(page, mapping, |
999 | page->index, GFP_KERNEL)) | 999 | page->index, |
1000 | readahead_gfp_mask(mapping))) | ||
1000 | goto next_page; | 1001 | goto next_page; |
1001 | } | 1002 | } |
1002 | 1003 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index eedc644b78d7..c8a05901a37b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -71,6 +71,8 @@ mpage_alloc(struct block_device *bdev, | |||
71 | { | 71 | { |
72 | struct bio *bio; | 72 | struct bio *bio; |
73 | 73 | ||
74 | /* Restrict the given (page cache) mask for slab allocations */ | ||
75 | gfp_flags &= GFP_KERNEL; | ||
74 | bio = bio_alloc(gfp_flags, nr_vecs); | 76 | bio = bio_alloc(gfp_flags, nr_vecs); |
75 | 77 | ||
76 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | 78 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { |
@@ -362,7 +364,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
362 | sector_t last_block_in_bio = 0; | 364 | sector_t last_block_in_bio = 0; |
363 | struct buffer_head map_bh; | 365 | struct buffer_head map_bh; |
364 | unsigned long first_logical_block = 0; | 366 | unsigned long first_logical_block = 0; |
365 | gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); | 367 | gfp_t gfp = readahead_gfp_mask(mapping); |
366 | 368 | ||
367 | map_bh.b_state = 0; | 369 | map_bh.b_state = 0; |
368 | map_bh.b_size = 0; | 370 | map_bh.b_size = 0; |
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 85640e955cde..06a8da75651d 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c | |||
@@ -80,7 +80,7 @@ static int orangefs_readpages(struct file *file, | |||
80 | if (!add_to_page_cache(page, | 80 | if (!add_to_page_cache(page, |
81 | mapping, | 81 | mapping, |
82 | page->index, | 82 | page->index, |
83 | GFP_KERNEL)) { | 83 | readahead_gfp_mask(mapping))) { |
84 | ret = read_one_page(page); | 84 | ret = read_one_page(page); |
85 | gossip_debug(GOSSIP_INODE_DEBUG, | 85 | gossip_debug(GOSSIP_INODE_DEBUG, |
86 | "failure adding page to cache, read_one_page returned: %d\n", | 86 | "failure adding page to cache, read_one_page returned: %d\n", |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 97354102794d..81363b834900 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -209,10 +209,10 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x) | |||
209 | return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); | 209 | return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); |
210 | } | 210 | } |
211 | 211 | ||
212 | static inline struct page *page_cache_alloc_readahead(struct address_space *x) | 212 | static inline gfp_t readahead_gfp_mask(struct address_space *x) |
213 | { | 213 | { |
214 | return __page_cache_alloc(mapping_gfp_mask(x) | | 214 | return mapping_gfp_mask(x) | |
215 | __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN); | 215 | __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; |
216 | } | 216 | } |
217 | 217 | ||
218 | typedef int filler_t(void *, struct page *); | 218 | typedef int filler_t(void *, struct page *); |
diff --git a/mm/readahead.c b/mm/readahead.c index 40be3ae0afe3..65ec288dc057 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -89,7 +89,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, | |||
89 | page = lru_to_page(pages); | 89 | page = lru_to_page(pages); |
90 | list_del(&page->lru); | 90 | list_del(&page->lru); |
91 | if (add_to_page_cache_lru(page, mapping, page->index, | 91 | if (add_to_page_cache_lru(page, mapping, page->index, |
92 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { | 92 | readahead_gfp_mask(mapping))) { |
93 | read_cache_pages_invalidate_page(mapping, page); | 93 | read_cache_pages_invalidate_page(mapping, page); |
94 | continue; | 94 | continue; |
95 | } | 95 | } |
@@ -108,7 +108,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, | |||
108 | EXPORT_SYMBOL(read_cache_pages); | 108 | EXPORT_SYMBOL(read_cache_pages); |
109 | 109 | ||
110 | static int read_pages(struct address_space *mapping, struct file *filp, | 110 | static int read_pages(struct address_space *mapping, struct file *filp, |
111 | struct list_head *pages, unsigned nr_pages) | 111 | struct list_head *pages, unsigned int nr_pages, gfp_t gfp) |
112 | { | 112 | { |
113 | struct blk_plug plug; | 113 | struct blk_plug plug; |
114 | unsigned page_idx; | 114 | unsigned page_idx; |
@@ -126,10 +126,8 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
126 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 126 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
127 | struct page *page = lru_to_page(pages); | 127 | struct page *page = lru_to_page(pages); |
128 | list_del(&page->lru); | 128 | list_del(&page->lru); |
129 | if (!add_to_page_cache_lru(page, mapping, page->index, | 129 | if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) |
130 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { | ||
131 | mapping->a_ops->readpage(filp, page); | 130 | mapping->a_ops->readpage(filp, page); |
132 | } | ||
133 | put_page(page); | 131 | put_page(page); |
134 | } | 132 | } |
135 | ret = 0; | 133 | ret = 0; |
@@ -159,6 +157,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, | |||
159 | int page_idx; | 157 | int page_idx; |
160 | int ret = 0; | 158 | int ret = 0; |
161 | loff_t isize = i_size_read(inode); | 159 | loff_t isize = i_size_read(inode); |
160 | gfp_t gfp_mask = readahead_gfp_mask(mapping); | ||
162 | 161 | ||
163 | if (isize == 0) | 162 | if (isize == 0) |
164 | goto out; | 163 | goto out; |
@@ -180,7 +179,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, | |||
180 | if (page && !radix_tree_exceptional_entry(page)) | 179 | if (page && !radix_tree_exceptional_entry(page)) |
181 | continue; | 180 | continue; |
182 | 181 | ||
183 | page = page_cache_alloc_readahead(mapping); | 182 | page = __page_cache_alloc(gfp_mask); |
184 | if (!page) | 183 | if (!page) |
185 | break; | 184 | break; |
186 | page->index = page_offset; | 185 | page->index = page_offset; |
@@ -196,7 +195,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, | |||
196 | * will then handle the error. | 195 | * will then handle the error. |
197 | */ | 196 | */ |
198 | if (ret) | 197 | if (ret) |
199 | read_pages(mapping, filp, &page_pool, ret); | 198 | read_pages(mapping, filp, &page_pool, ret, gfp_mask); |
200 | BUG_ON(!list_empty(&page_pool)); | 199 | BUG_ON(!list_empty(&page_pool)); |
201 | out: | 200 | out: |
202 | return ret; | 201 | return ret; |