summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c221
1 files changed, 167 insertions, 54 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 594d73fef8b4..923fc2ebd74a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,7 @@
35#include <linux/hugetlb.h> 35#include <linux/hugetlb.h>
36#include <linux/memcontrol.h> 36#include <linux/memcontrol.h>
37#include <linux/cleancache.h> 37#include <linux/cleancache.h>
38#include <linux/shmem_fs.h>
38#include <linux/rmap.h> 39#include <linux/rmap.h>
39#include "internal.h" 40#include "internal.h"
40 41
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
134 *shadowp = p; 135 *shadowp = p;
135 } 136 }
136 __radix_tree_replace(&mapping->page_tree, node, slot, page, 137 __radix_tree_replace(&mapping->page_tree, node, slot, page,
137 workingset_update_node, mapping); 138 workingset_lookup_update(mapping));
138 mapping->nrpages++; 139 mapping->nrpages++;
139 return 0; 140 return 0;
140} 141}
@@ -162,9 +163,12 @@ static void page_cache_tree_delete(struct address_space *mapping,
162 163
163 radix_tree_clear_tags(&mapping->page_tree, node, slot); 164 radix_tree_clear_tags(&mapping->page_tree, node, slot);
164 __radix_tree_replace(&mapping->page_tree, node, slot, shadow, 165 __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
165 workingset_update_node, mapping); 166 workingset_lookup_update(mapping));
166 } 167 }
167 168
169 page->mapping = NULL;
170 /* Leave page->index set: truncation lookup relies upon it */
171
168 if (shadow) { 172 if (shadow) {
169 mapping->nrexceptional += nr; 173 mapping->nrexceptional += nr;
170 /* 174 /*
@@ -178,17 +182,11 @@ static void page_cache_tree_delete(struct address_space *mapping,
178 mapping->nrpages -= nr; 182 mapping->nrpages -= nr;
179} 183}
180 184
181/* 185static void unaccount_page_cache_page(struct address_space *mapping,
182 * Delete a page from the page cache and free it. Caller has to make 186 struct page *page)
183 * sure the page is locked and that nobody else uses it - or that usage
184 * is safe. The caller must hold the mapping's tree_lock.
185 */
186void __delete_from_page_cache(struct page *page, void *shadow)
187{ 187{
188 struct address_space *mapping = page->mapping; 188 int nr;
189 int nr = hpage_nr_pages(page);
190 189
191 trace_mm_filemap_delete_from_page_cache(page);
192 /* 190 /*
193 * if we're uptodate, flush out into the cleancache, otherwise 191 * if we're uptodate, flush out into the cleancache, otherwise
194 * invalidate any existing cleancache entries. We can't leave 192 * invalidate any existing cleancache entries. We can't leave
@@ -224,15 +222,12 @@ void __delete_from_page_cache(struct page *page, void *shadow)
224 } 222 }
225 } 223 }
226 224
227 page_cache_tree_delete(mapping, page, shadow);
228
229 page->mapping = NULL;
230 /* Leave page->index set: truncation lookup relies upon it */
231
232 /* hugetlb pages do not participate in page cache accounting. */ 225 /* hugetlb pages do not participate in page cache accounting. */
233 if (PageHuge(page)) 226 if (PageHuge(page))
234 return; 227 return;
235 228
229 nr = hpage_nr_pages(page);
230
236 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); 231 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
237 if (PageSwapBacked(page)) { 232 if (PageSwapBacked(page)) {
238 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); 233 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
@@ -243,17 +238,51 @@ void __delete_from_page_cache(struct page *page, void *shadow)
243 } 238 }
244 239
245 /* 240 /*
246 * At this point page must be either written or cleaned by truncate. 241 * At this point page must be either written or cleaned by
247 * Dirty page here signals a bug and loss of unwritten data. 242 * truncate. Dirty page here signals a bug and loss of
243 * unwritten data.
248 * 244 *
249 * This fixes dirty accounting after removing the page entirely but 245 * This fixes dirty accounting after removing the page entirely
250 * leaves PageDirty set: it has no effect for truncated page and 246 * but leaves PageDirty set: it has no effect for truncated
251 * anyway will be cleared before returning page into buddy allocator. 247 * page and anyway will be cleared before returning page into
248 * buddy allocator.
252 */ 249 */
253 if (WARN_ON_ONCE(PageDirty(page))) 250 if (WARN_ON_ONCE(PageDirty(page)))
254 account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); 251 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
255} 252}
256 253
254/*
255 * Delete a page from the page cache and free it. Caller has to make
256 * sure the page is locked and that nobody else uses it - or that usage
257 * is safe. The caller must hold the mapping's tree_lock.
258 */
259void __delete_from_page_cache(struct page *page, void *shadow)
260{
261 struct address_space *mapping = page->mapping;
262
263 trace_mm_filemap_delete_from_page_cache(page);
264
265 unaccount_page_cache_page(mapping, page);
266 page_cache_tree_delete(mapping, page, shadow);
267}
268
269static void page_cache_free_page(struct address_space *mapping,
270 struct page *page)
271{
272 void (*freepage)(struct page *);
273
274 freepage = mapping->a_ops->freepage;
275 if (freepage)
276 freepage(page);
277
278 if (PageTransHuge(page) && !PageHuge(page)) {
279 page_ref_sub(page, HPAGE_PMD_NR);
280 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
281 } else {
282 put_page(page);
283 }
284}
285
257/** 286/**
258 * delete_from_page_cache - delete page from page cache 287 * delete_from_page_cache - delete page from page cache
259 * @page: the page which the kernel is trying to remove from page cache 288 * @page: the page which the kernel is trying to remove from page cache
@@ -266,27 +295,98 @@ void delete_from_page_cache(struct page *page)
266{ 295{
267 struct address_space *mapping = page_mapping(page); 296 struct address_space *mapping = page_mapping(page);
268 unsigned long flags; 297 unsigned long flags;
269 void (*freepage)(struct page *);
270 298
271 BUG_ON(!PageLocked(page)); 299 BUG_ON(!PageLocked(page));
272
273 freepage = mapping->a_ops->freepage;
274
275 spin_lock_irqsave(&mapping->tree_lock, flags); 300 spin_lock_irqsave(&mapping->tree_lock, flags);
276 __delete_from_page_cache(page, NULL); 301 __delete_from_page_cache(page, NULL);
277 spin_unlock_irqrestore(&mapping->tree_lock, flags); 302 spin_unlock_irqrestore(&mapping->tree_lock, flags);
278 303
279 if (freepage) 304 page_cache_free_page(mapping, page);
280 freepage(page); 305}
306EXPORT_SYMBOL(delete_from_page_cache);
281 307
282 if (PageTransHuge(page) && !PageHuge(page)) { 308/*
283 page_ref_sub(page, HPAGE_PMD_NR); 309 * page_cache_tree_delete_batch - delete several pages from page cache
284 VM_BUG_ON_PAGE(page_count(page) <= 0, page); 310 * @mapping: the mapping to which pages belong
285 } else { 311 * @pvec: pagevec with pages to delete
286 put_page(page); 312 *
313 * The function walks over mapping->page_tree and removes pages passed in @pvec
314 * from the radix tree. The function expects @pvec to be sorted by page index.
315 * It tolerates holes in @pvec (radix tree entries at those indices are not
316 * modified). The function expects only THP head pages to be present in the
317 * @pvec and takes care to delete all corresponding tail pages from the radix
318 * tree as well.
319 *
320 * The function expects mapping->tree_lock to be held.
321 */
322static void
323page_cache_tree_delete_batch(struct address_space *mapping,
324 struct pagevec *pvec)
325{
326 struct radix_tree_iter iter;
327 void **slot;
328 int total_pages = 0;
329 int i = 0, tail_pages = 0;
330 struct page *page;
331 pgoff_t start;
332
333 start = pvec->pages[0]->index;
334 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
335 if (i >= pagevec_count(pvec) && !tail_pages)
336 break;
337 page = radix_tree_deref_slot_protected(slot,
338 &mapping->tree_lock);
339 if (radix_tree_exceptional_entry(page))
340 continue;
341 if (!tail_pages) {
342 /*
343 * Some page got inserted in our range? Skip it. We
344 * have our pages locked so they are protected from
345 * being removed.
346 */
347 if (page != pvec->pages[i])
348 continue;
349 WARN_ON_ONCE(!PageLocked(page));
350 if (PageTransHuge(page) && !PageHuge(page))
351 tail_pages = HPAGE_PMD_NR - 1;
352 page->mapping = NULL;
353 /*
354 * Leave page->index set: truncation lookup relies
355 * upon it
356 */
357 i++;
358 } else {
359 tail_pages--;
360 }
361 radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
362 __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
363 workingset_lookup_update(mapping));
364 total_pages++;
287 } 365 }
366 mapping->nrpages -= total_pages;
367}
368
369void delete_from_page_cache_batch(struct address_space *mapping,
370 struct pagevec *pvec)
371{
372 int i;
373 unsigned long flags;
374
375 if (!pagevec_count(pvec))
376 return;
377
378 spin_lock_irqsave(&mapping->tree_lock, flags);
379 for (i = 0; i < pagevec_count(pvec); i++) {
380 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
381
382 unaccount_page_cache_page(mapping, pvec->pages[i]);
383 }
384 page_cache_tree_delete_batch(mapping, pvec);
385 spin_unlock_irqrestore(&mapping->tree_lock, flags);
386
387 for (i = 0; i < pagevec_count(pvec); i++)
388 page_cache_free_page(mapping, pvec->pages[i]);
288} 389}
289EXPORT_SYMBOL(delete_from_page_cache);
290 390
291int filemap_check_errors(struct address_space *mapping) 391int filemap_check_errors(struct address_space *mapping)
292{ 392{
@@ -419,20 +519,18 @@ static void __filemap_fdatawait_range(struct address_space *mapping,
419 if (end_byte < start_byte) 519 if (end_byte < start_byte)
420 return; 520 return;
421 521
422 pagevec_init(&pvec, 0); 522 pagevec_init(&pvec);
423 while ((index <= end) && 523 while (index <= end) {
424 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
425 PAGECACHE_TAG_WRITEBACK,
426 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
427 unsigned i; 524 unsigned i;
428 525
526 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
527 end, PAGECACHE_TAG_WRITEBACK);
528 if (!nr_pages)
529 break;
530
429 for (i = 0; i < nr_pages; i++) { 531 for (i = 0; i < nr_pages; i++) {
430 struct page *page = pvec.pages[i]; 532 struct page *page = pvec.pages[i];
431 533
432 /* until radix tree lookup accepts end_index */
433 if (page->index > end)
434 continue;
435
436 wait_on_page_writeback(page); 534 wait_on_page_writeback(page);
437 ClearPageError(page); 535 ClearPageError(page);
438 } 536 }
@@ -1754,9 +1852,10 @@ repeat:
1754EXPORT_SYMBOL(find_get_pages_contig); 1852EXPORT_SYMBOL(find_get_pages_contig);
1755 1853
1756/** 1854/**
1757 * find_get_pages_tag - find and return pages that match @tag 1855 * find_get_pages_range_tag - find and return pages in given range matching @tag
1758 * @mapping: the address_space to search 1856 * @mapping: the address_space to search
1759 * @index: the starting page index 1857 * @index: the starting page index
1858 * @end: The final page index (inclusive)
1760 * @tag: the tag index 1859 * @tag: the tag index
1761 * @nr_pages: the maximum number of pages 1860 * @nr_pages: the maximum number of pages
1762 * @pages: where the resulting pages are placed 1861 * @pages: where the resulting pages are placed
@@ -1764,8 +1863,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
1764 * Like find_get_pages, except we only return pages which are tagged with 1863 * Like find_get_pages, except we only return pages which are tagged with
1765 * @tag. We update @index to index the next page for the traversal. 1864 * @tag. We update @index to index the next page for the traversal.
1766 */ 1865 */
1767unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 1866unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1768 int tag, unsigned int nr_pages, struct page **pages) 1867 pgoff_t end, int tag, unsigned int nr_pages,
1868 struct page **pages)
1769{ 1869{
1770 struct radix_tree_iter iter; 1870 struct radix_tree_iter iter;
1771 void **slot; 1871 void **slot;
@@ -1778,6 +1878,9 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
1778 radix_tree_for_each_tagged(slot, &mapping->page_tree, 1878 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1779 &iter, *index, tag) { 1879 &iter, *index, tag) {
1780 struct page *head, *page; 1880 struct page *head, *page;
1881
1882 if (iter.index > end)
1883 break;
1781repeat: 1884repeat:
1782 page = radix_tree_deref_slot(slot); 1885 page = radix_tree_deref_slot(slot);
1783 if (unlikely(!page)) 1886 if (unlikely(!page))
@@ -1819,18 +1922,28 @@ repeat:
1819 } 1922 }
1820 1923
1821 pages[ret] = page; 1924 pages[ret] = page;
1822 if (++ret == nr_pages) 1925 if (++ret == nr_pages) {
1823 break; 1926 *index = pages[ret - 1]->index + 1;
1927 goto out;
1928 }
1824 } 1929 }
1825 1930
1931 /*
1932 * We come here when we got at @end. We take care to not overflow the
1933 * index @index as it confuses some of the callers. This breaks the
1934 * iteration when there is page at index -1 but that is already broken
1935 * anyway.
1936 */
1937 if (end == (pgoff_t)-1)
1938 *index = (pgoff_t)-1;
1939 else
1940 *index = end + 1;
1941out:
1826 rcu_read_unlock(); 1942 rcu_read_unlock();
1827 1943
1828 if (ret)
1829 *index = pages[ret - 1]->index + 1;
1830
1831 return ret; 1944 return ret;
1832} 1945}
1833EXPORT_SYMBOL(find_get_pages_tag); 1946EXPORT_SYMBOL(find_get_pages_range_tag);
1834 1947
1835/** 1948/**
1836 * find_get_entries_tag - find and return entries that match @tag 1949 * find_get_entries_tag - find and return entries that match @tag
@@ -2159,7 +2272,7 @@ no_cached_page:
2159 * Ok, it wasn't cached, so we need to create a new 2272 * Ok, it wasn't cached, so we need to create a new
2160 * page.. 2273 * page..
2161 */ 2274 */
2162 page = page_cache_alloc_cold(mapping); 2275 page = page_cache_alloc(mapping);
2163 if (!page) { 2276 if (!page) {
2164 error = -ENOMEM; 2277 error = -ENOMEM;
2165 goto out; 2278 goto out;
@@ -2271,7 +2384,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
2271 int ret; 2384 int ret;
2272 2385
2273 do { 2386 do {
2274 page = __page_cache_alloc(gfp_mask|__GFP_COLD); 2387 page = __page_cache_alloc(gfp_mask);
2275 if (!page) 2388 if (!page)
2276 return -ENOMEM; 2389 return -ENOMEM;
2277 2390
@@ -2675,7 +2788,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
2675repeat: 2788repeat:
2676 page = find_get_page(mapping, index); 2789 page = find_get_page(mapping, index);
2677 if (!page) { 2790 if (!page) {
2678 page = __page_cache_alloc(gfp | __GFP_COLD); 2791 page = __page_cache_alloc(gfp);
2679 if (!page) 2792 if (!page)
2680 return ERR_PTR(-ENOMEM); 2793 return ERR_PTR(-ENOMEM);
2681 err = add_to_page_cache_lru(page, mapping, index, gfp); 2794 err = add_to_page_cache_lru(page, mapping, index, gfp);