diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 221 |
1 files changed, 167 insertions, 54 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 594d73fef8b4..923fc2ebd74a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/hugetlb.h> | 35 | #include <linux/hugetlb.h> |
36 | #include <linux/memcontrol.h> | 36 | #include <linux/memcontrol.h> |
37 | #include <linux/cleancache.h> | 37 | #include <linux/cleancache.h> |
38 | #include <linux/shmem_fs.h> | ||
38 | #include <linux/rmap.h> | 39 | #include <linux/rmap.h> |
39 | #include "internal.h" | 40 | #include "internal.h" |
40 | 41 | ||
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping, | |||
134 | *shadowp = p; | 135 | *shadowp = p; |
135 | } | 136 | } |
136 | __radix_tree_replace(&mapping->page_tree, node, slot, page, | 137 | __radix_tree_replace(&mapping->page_tree, node, slot, page, |
137 | workingset_update_node, mapping); | 138 | workingset_lookup_update(mapping)); |
138 | mapping->nrpages++; | 139 | mapping->nrpages++; |
139 | return 0; | 140 | return 0; |
140 | } | 141 | } |
@@ -162,9 +163,12 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
162 | 163 | ||
163 | radix_tree_clear_tags(&mapping->page_tree, node, slot); | 164 | radix_tree_clear_tags(&mapping->page_tree, node, slot); |
164 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, | 165 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, |
165 | workingset_update_node, mapping); | 166 | workingset_lookup_update(mapping)); |
166 | } | 167 | } |
167 | 168 | ||
169 | page->mapping = NULL; | ||
170 | /* Leave page->index set: truncation lookup relies upon it */ | ||
171 | |||
168 | if (shadow) { | 172 | if (shadow) { |
169 | mapping->nrexceptional += nr; | 173 | mapping->nrexceptional += nr; |
170 | /* | 174 | /* |
@@ -178,17 +182,11 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
178 | mapping->nrpages -= nr; | 182 | mapping->nrpages -= nr; |
179 | } | 183 | } |
180 | 184 | ||
181 | /* | 185 | static void unaccount_page_cache_page(struct address_space *mapping, |
182 | * Delete a page from the page cache and free it. Caller has to make | 186 | struct page *page) |
183 | * sure the page is locked and that nobody else uses it - or that usage | ||
184 | * is safe. The caller must hold the mapping's tree_lock. | ||
185 | */ | ||
186 | void __delete_from_page_cache(struct page *page, void *shadow) | ||
187 | { | 187 | { |
188 | struct address_space *mapping = page->mapping; | 188 | int nr; |
189 | int nr = hpage_nr_pages(page); | ||
190 | 189 | ||
191 | trace_mm_filemap_delete_from_page_cache(page); | ||
192 | /* | 190 | /* |
193 | * if we're uptodate, flush out into the cleancache, otherwise | 191 | * if we're uptodate, flush out into the cleancache, otherwise |
194 | * invalidate any existing cleancache entries. We can't leave | 192 | * invalidate any existing cleancache entries. We can't leave |
@@ -224,15 +222,12 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
224 | } | 222 | } |
225 | } | 223 | } |
226 | 224 | ||
227 | page_cache_tree_delete(mapping, page, shadow); | ||
228 | |||
229 | page->mapping = NULL; | ||
230 | /* Leave page->index set: truncation lookup relies upon it */ | ||
231 | |||
232 | /* hugetlb pages do not participate in page cache accounting. */ | 225 | /* hugetlb pages do not participate in page cache accounting. */ |
233 | if (PageHuge(page)) | 226 | if (PageHuge(page)) |
234 | return; | 227 | return; |
235 | 228 | ||
229 | nr = hpage_nr_pages(page); | ||
230 | |||
236 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); | 231 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); |
237 | if (PageSwapBacked(page)) { | 232 | if (PageSwapBacked(page)) { |
238 | __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); | 233 | __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); |
@@ -243,17 +238,51 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
243 | } | 238 | } |
244 | 239 | ||
245 | /* | 240 | /* |
246 | * At this point page must be either written or cleaned by truncate. | 241 | * At this point page must be either written or cleaned by |
247 | * Dirty page here signals a bug and loss of unwritten data. | 242 | * truncate. Dirty page here signals a bug and loss of |
243 | * unwritten data. | ||
248 | * | 244 | * |
249 | * This fixes dirty accounting after removing the page entirely but | 245 | * This fixes dirty accounting after removing the page entirely |
250 | * leaves PageDirty set: it has no effect for truncated page and | 246 | * but leaves PageDirty set: it has no effect for truncated |
251 | * anyway will be cleared before returning page into buddy allocator. | 247 | * page and anyway will be cleared before returning page into |
248 | * buddy allocator. | ||
252 | */ | 249 | */ |
253 | if (WARN_ON_ONCE(PageDirty(page))) | 250 | if (WARN_ON_ONCE(PageDirty(page))) |
254 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); | 251 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); |
255 | } | 252 | } |
256 | 253 | ||
254 | /* | ||
255 | * Delete a page from the page cache and free it. Caller has to make | ||
256 | * sure the page is locked and that nobody else uses it - or that usage | ||
257 | * is safe. The caller must hold the mapping's tree_lock. | ||
258 | */ | ||
259 | void __delete_from_page_cache(struct page *page, void *shadow) | ||
260 | { | ||
261 | struct address_space *mapping = page->mapping; | ||
262 | |||
263 | trace_mm_filemap_delete_from_page_cache(page); | ||
264 | |||
265 | unaccount_page_cache_page(mapping, page); | ||
266 | page_cache_tree_delete(mapping, page, shadow); | ||
267 | } | ||
268 | |||
269 | static void page_cache_free_page(struct address_space *mapping, | ||
270 | struct page *page) | ||
271 | { | ||
272 | void (*freepage)(struct page *); | ||
273 | |||
274 | freepage = mapping->a_ops->freepage; | ||
275 | if (freepage) | ||
276 | freepage(page); | ||
277 | |||
278 | if (PageTransHuge(page) && !PageHuge(page)) { | ||
279 | page_ref_sub(page, HPAGE_PMD_NR); | ||
280 | VM_BUG_ON_PAGE(page_count(page) <= 0, page); | ||
281 | } else { | ||
282 | put_page(page); | ||
283 | } | ||
284 | } | ||
285 | |||
257 | /** | 286 | /** |
258 | * delete_from_page_cache - delete page from page cache | 287 | * delete_from_page_cache - delete page from page cache |
259 | * @page: the page which the kernel is trying to remove from page cache | 288 | * @page: the page which the kernel is trying to remove from page cache |
@@ -266,27 +295,98 @@ void delete_from_page_cache(struct page *page) | |||
266 | { | 295 | { |
267 | struct address_space *mapping = page_mapping(page); | 296 | struct address_space *mapping = page_mapping(page); |
268 | unsigned long flags; | 297 | unsigned long flags; |
269 | void (*freepage)(struct page *); | ||
270 | 298 | ||
271 | BUG_ON(!PageLocked(page)); | 299 | BUG_ON(!PageLocked(page)); |
272 | |||
273 | freepage = mapping->a_ops->freepage; | ||
274 | |||
275 | spin_lock_irqsave(&mapping->tree_lock, flags); | 300 | spin_lock_irqsave(&mapping->tree_lock, flags); |
276 | __delete_from_page_cache(page, NULL); | 301 | __delete_from_page_cache(page, NULL); |
277 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 302 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
278 | 303 | ||
279 | if (freepage) | 304 | page_cache_free_page(mapping, page); |
280 | freepage(page); | 305 | } |
306 | EXPORT_SYMBOL(delete_from_page_cache); | ||
281 | 307 | ||
282 | if (PageTransHuge(page) && !PageHuge(page)) { | 308 | /* |
283 | page_ref_sub(page, HPAGE_PMD_NR); | 309 | * page_cache_tree_delete_batch - delete several pages from page cache |
284 | VM_BUG_ON_PAGE(page_count(page) <= 0, page); | 310 | * @mapping: the mapping to which pages belong |
285 | } else { | 311 | * @pvec: pagevec with pages to delete |
286 | put_page(page); | 312 | * |
313 | * The function walks over mapping->page_tree and removes pages passed in @pvec | ||
314 | * from the radix tree. The function expects @pvec to be sorted by page index. | ||
315 | * It tolerates holes in @pvec (radix tree entries at those indices are not | ||
316 | * modified). The function expects only THP head pages to be present in the | ||
317 | * @pvec and takes care to delete all corresponding tail pages from the radix | ||
318 | * tree as well. | ||
319 | * | ||
320 | * The function expects mapping->tree_lock to be held. | ||
321 | */ | ||
322 | static void | ||
323 | page_cache_tree_delete_batch(struct address_space *mapping, | ||
324 | struct pagevec *pvec) | ||
325 | { | ||
326 | struct radix_tree_iter iter; | ||
327 | void **slot; | ||
328 | int total_pages = 0; | ||
329 | int i = 0, tail_pages = 0; | ||
330 | struct page *page; | ||
331 | pgoff_t start; | ||
332 | |||
333 | start = pvec->pages[0]->index; | ||
334 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | ||
335 | if (i >= pagevec_count(pvec) && !tail_pages) | ||
336 | break; | ||
337 | page = radix_tree_deref_slot_protected(slot, | ||
338 | &mapping->tree_lock); | ||
339 | if (radix_tree_exceptional_entry(page)) | ||
340 | continue; | ||
341 | if (!tail_pages) { | ||
342 | /* | ||
343 | * Some page got inserted in our range? Skip it. We | ||
344 | * have our pages locked so they are protected from | ||
345 | * being removed. | ||
346 | */ | ||
347 | if (page != pvec->pages[i]) | ||
348 | continue; | ||
349 | WARN_ON_ONCE(!PageLocked(page)); | ||
350 | if (PageTransHuge(page) && !PageHuge(page)) | ||
351 | tail_pages = HPAGE_PMD_NR - 1; | ||
352 | page->mapping = NULL; | ||
353 | /* | ||
354 | * Leave page->index set: truncation lookup relies | ||
355 | * upon it | ||
356 | */ | ||
357 | i++; | ||
358 | } else { | ||
359 | tail_pages--; | ||
360 | } | ||
361 | radix_tree_clear_tags(&mapping->page_tree, iter.node, slot); | ||
362 | __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL, | ||
363 | workingset_lookup_update(mapping)); | ||
364 | total_pages++; | ||
287 | } | 365 | } |
366 | mapping->nrpages -= total_pages; | ||
367 | } | ||
368 | |||
369 | void delete_from_page_cache_batch(struct address_space *mapping, | ||
370 | struct pagevec *pvec) | ||
371 | { | ||
372 | int i; | ||
373 | unsigned long flags; | ||
374 | |||
375 | if (!pagevec_count(pvec)) | ||
376 | return; | ||
377 | |||
378 | spin_lock_irqsave(&mapping->tree_lock, flags); | ||
379 | for (i = 0; i < pagevec_count(pvec); i++) { | ||
380 | trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); | ||
381 | |||
382 | unaccount_page_cache_page(mapping, pvec->pages[i]); | ||
383 | } | ||
384 | page_cache_tree_delete_batch(mapping, pvec); | ||
385 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | ||
386 | |||
387 | for (i = 0; i < pagevec_count(pvec); i++) | ||
388 | page_cache_free_page(mapping, pvec->pages[i]); | ||
288 | } | 389 | } |
289 | EXPORT_SYMBOL(delete_from_page_cache); | ||
290 | 390 | ||
291 | int filemap_check_errors(struct address_space *mapping) | 391 | int filemap_check_errors(struct address_space *mapping) |
292 | { | 392 | { |
@@ -419,20 +519,18 @@ static void __filemap_fdatawait_range(struct address_space *mapping, | |||
419 | if (end_byte < start_byte) | 519 | if (end_byte < start_byte) |
420 | return; | 520 | return; |
421 | 521 | ||
422 | pagevec_init(&pvec, 0); | 522 | pagevec_init(&pvec); |
423 | while ((index <= end) && | 523 | while (index <= end) { |
424 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
425 | PAGECACHE_TAG_WRITEBACK, | ||
426 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
427 | unsigned i; | 524 | unsigned i; |
428 | 525 | ||
526 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, | ||
527 | end, PAGECACHE_TAG_WRITEBACK); | ||
528 | if (!nr_pages) | ||
529 | break; | ||
530 | |||
429 | for (i = 0; i < nr_pages; i++) { | 531 | for (i = 0; i < nr_pages; i++) { |
430 | struct page *page = pvec.pages[i]; | 532 | struct page *page = pvec.pages[i]; |
431 | 533 | ||
432 | /* until radix tree lookup accepts end_index */ | ||
433 | if (page->index > end) | ||
434 | continue; | ||
435 | |||
436 | wait_on_page_writeback(page); | 534 | wait_on_page_writeback(page); |
437 | ClearPageError(page); | 535 | ClearPageError(page); |
438 | } | 536 | } |
@@ -1754,9 +1852,10 @@ repeat: | |||
1754 | EXPORT_SYMBOL(find_get_pages_contig); | 1852 | EXPORT_SYMBOL(find_get_pages_contig); |
1755 | 1853 | ||
1756 | /** | 1854 | /** |
1757 | * find_get_pages_tag - find and return pages that match @tag | 1855 | * find_get_pages_range_tag - find and return pages in given range matching @tag |
1758 | * @mapping: the address_space to search | 1856 | * @mapping: the address_space to search |
1759 | * @index: the starting page index | 1857 | * @index: the starting page index |
1858 | * @end: The final page index (inclusive) | ||
1760 | * @tag: the tag index | 1859 | * @tag: the tag index |
1761 | * @nr_pages: the maximum number of pages | 1860 | * @nr_pages: the maximum number of pages |
1762 | * @pages: where the resulting pages are placed | 1861 | * @pages: where the resulting pages are placed |
@@ -1764,8 +1863,9 @@ EXPORT_SYMBOL(find_get_pages_contig); | |||
1764 | * Like find_get_pages, except we only return pages which are tagged with | 1863 | * Like find_get_pages, except we only return pages which are tagged with |
1765 | * @tag. We update @index to index the next page for the traversal. | 1864 | * @tag. We update @index to index the next page for the traversal. |
1766 | */ | 1865 | */ |
1767 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 1866 | unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, |
1768 | int tag, unsigned int nr_pages, struct page **pages) | 1867 | pgoff_t end, int tag, unsigned int nr_pages, |
1868 | struct page **pages) | ||
1769 | { | 1869 | { |
1770 | struct radix_tree_iter iter; | 1870 | struct radix_tree_iter iter; |
1771 | void **slot; | 1871 | void **slot; |
@@ -1778,6 +1878,9 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
1778 | radix_tree_for_each_tagged(slot, &mapping->page_tree, | 1878 | radix_tree_for_each_tagged(slot, &mapping->page_tree, |
1779 | &iter, *index, tag) { | 1879 | &iter, *index, tag) { |
1780 | struct page *head, *page; | 1880 | struct page *head, *page; |
1881 | |||
1882 | if (iter.index > end) | ||
1883 | break; | ||
1781 | repeat: | 1884 | repeat: |
1782 | page = radix_tree_deref_slot(slot); | 1885 | page = radix_tree_deref_slot(slot); |
1783 | if (unlikely(!page)) | 1886 | if (unlikely(!page)) |
@@ -1819,18 +1922,28 @@ repeat: | |||
1819 | } | 1922 | } |
1820 | 1923 | ||
1821 | pages[ret] = page; | 1924 | pages[ret] = page; |
1822 | if (++ret == nr_pages) | 1925 | if (++ret == nr_pages) { |
1823 | break; | 1926 | *index = pages[ret - 1]->index + 1; |
1927 | goto out; | ||
1928 | } | ||
1824 | } | 1929 | } |
1825 | 1930 | ||
1931 | /* | ||
1932 | * We come here when we got at @end. We take care to not overflow the | ||
1933 | * index @index as it confuses some of the callers. This breaks the | ||
1934 | * iteration when there is page at index -1 but that is already broken | ||
1935 | * anyway. | ||
1936 | */ | ||
1937 | if (end == (pgoff_t)-1) | ||
1938 | *index = (pgoff_t)-1; | ||
1939 | else | ||
1940 | *index = end + 1; | ||
1941 | out: | ||
1826 | rcu_read_unlock(); | 1942 | rcu_read_unlock(); |
1827 | 1943 | ||
1828 | if (ret) | ||
1829 | *index = pages[ret - 1]->index + 1; | ||
1830 | |||
1831 | return ret; | 1944 | return ret; |
1832 | } | 1945 | } |
1833 | EXPORT_SYMBOL(find_get_pages_tag); | 1946 | EXPORT_SYMBOL(find_get_pages_range_tag); |
1834 | 1947 | ||
1835 | /** | 1948 | /** |
1836 | * find_get_entries_tag - find and return entries that match @tag | 1949 | * find_get_entries_tag - find and return entries that match @tag |
@@ -2159,7 +2272,7 @@ no_cached_page: | |||
2159 | * Ok, it wasn't cached, so we need to create a new | 2272 | * Ok, it wasn't cached, so we need to create a new |
2160 | * page.. | 2273 | * page.. |
2161 | */ | 2274 | */ |
2162 | page = page_cache_alloc_cold(mapping); | 2275 | page = page_cache_alloc(mapping); |
2163 | if (!page) { | 2276 | if (!page) { |
2164 | error = -ENOMEM; | 2277 | error = -ENOMEM; |
2165 | goto out; | 2278 | goto out; |
@@ -2271,7 +2384,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) | |||
2271 | int ret; | 2384 | int ret; |
2272 | 2385 | ||
2273 | do { | 2386 | do { |
2274 | page = __page_cache_alloc(gfp_mask|__GFP_COLD); | 2387 | page = __page_cache_alloc(gfp_mask); |
2275 | if (!page) | 2388 | if (!page) |
2276 | return -ENOMEM; | 2389 | return -ENOMEM; |
2277 | 2390 | ||
@@ -2675,7 +2788,7 @@ static struct page *do_read_cache_page(struct address_space *mapping, | |||
2675 | repeat: | 2788 | repeat: |
2676 | page = find_get_page(mapping, index); | 2789 | page = find_get_page(mapping, index); |
2677 | if (!page) { | 2790 | if (!page) { |
2678 | page = __page_cache_alloc(gfp | __GFP_COLD); | 2791 | page = __page_cache_alloc(gfp); |
2679 | if (!page) | 2792 | if (!page) |
2680 | return ERR_PTR(-ENOMEM); | 2793 | return ERR_PTR(-ENOMEM); |
2681 | err = add_to_page_cache_lru(page, mapping, index, gfp); | 2794 | err = add_to_page_cache_lru(page, mapping, index, gfp); |