diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-23 14:13:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-23 14:13:56 -0500 |
commit | 20c759ca98468d96d1fff8bd5e6753f458dbbfbd (patch) | |
tree | 202bb6951fcb6857128d96154bcde1bddbb22316 /mm | |
parent | b82dde0230439215b55e545880e90337ee16f51a (diff) | |
parent | 114bf37e04d839b555b3dc460b5e6ce156f49cf0 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge small final update from Andrew Morton:
- DAX feature work: add fsync/msync support
- kfree cleanup, MAINTAINERS update
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
MAINTAINERS: return arch/sh to maintained state, with new maintainers
tree wide: use kvfree() than conditional kfree()/vfree()
dax: never rely on bh.b_dev being set by get_block()
xfs: call dax_pfn_mkwrite() for DAX fsync/msync
ext4: call dax_pfn_mkwrite() for DAX fsync/msync
ext2: call dax_pfn_mkwrite() for DAX fsync/msync
dax: add support for fsync/sync
mm: add find_get_entries_tag()
dax: support dirty DAX entries in radix tree
pmem: add wb_cache_pmem() to the PMEM API
dax: fix conversion of holes to PMDs
dax: fix NULL pointer dereference in __dax_dbg()
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 91 | ||||
-rw-r--r-- | mm/percpu.c | 18 | ||||
-rw-r--r-- | mm/truncate.c | 69 | ||||
-rw-r--r-- | mm/vmscan.c | 9 | ||||
-rw-r--r-- | mm/workingset.c | 4 |
5 files changed, 141 insertions, 50 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 847ee43c2806..2e7c8d980d5e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -11,6 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | #include <linux/export.h> | 12 | #include <linux/export.h> |
13 | #include <linux/compiler.h> | 13 | #include <linux/compiler.h> |
14 | #include <linux/dax.h> | ||
14 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
15 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
16 | #include <linux/capability.h> | 17 | #include <linux/capability.h> |
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
123 | __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); | 124 | __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); |
124 | 125 | ||
125 | if (shadow) { | 126 | if (shadow) { |
126 | mapping->nrshadows++; | 127 | mapping->nrexceptional++; |
127 | /* | 128 | /* |
128 | * Make sure the nrshadows update is committed before | 129 | * Make sure the nrexceptional update is committed before |
129 | * the nrpages update so that final truncate racing | 130 | * the nrpages update so that final truncate racing |
130 | * with reclaim does not see both counters 0 at the | 131 | * with reclaim does not see both counters 0 at the |
131 | * same time and miss a shadow entry. | 132 | * same time and miss a shadow entry. |
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
481 | { | 482 | { |
482 | int err = 0; | 483 | int err = 0; |
483 | 484 | ||
485 | if (dax_mapping(mapping) && mapping->nrexceptional) { | ||
486 | err = dax_writeback_mapping_range(mapping, lstart, lend); | ||
487 | if (err) | ||
488 | return err; | ||
489 | } | ||
490 | |||
484 | if (mapping->nrpages) { | 491 | if (mapping->nrpages) { |
485 | err = __filemap_fdatawrite_range(mapping, lstart, lend, | 492 | err = __filemap_fdatawrite_range(mapping, lstart, lend, |
486 | WB_SYNC_ALL); | 493 | WB_SYNC_ALL); |
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping, | |||
579 | p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 586 | p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); |
580 | if (!radix_tree_exceptional_entry(p)) | 587 | if (!radix_tree_exceptional_entry(p)) |
581 | return -EEXIST; | 588 | return -EEXIST; |
589 | |||
590 | if (WARN_ON(dax_mapping(mapping))) | ||
591 | return -EINVAL; | ||
592 | |||
582 | if (shadowp) | 593 | if (shadowp) |
583 | *shadowp = p; | 594 | *shadowp = p; |
584 | mapping->nrshadows--; | 595 | mapping->nrexceptional--; |
585 | if (node) | 596 | if (node) |
586 | workingset_node_shadows_dec(node); | 597 | workingset_node_shadows_dec(node); |
587 | } | 598 | } |
@@ -1245,9 +1256,9 @@ repeat: | |||
1245 | if (radix_tree_deref_retry(page)) | 1256 | if (radix_tree_deref_retry(page)) |
1246 | goto restart; | 1257 | goto restart; |
1247 | /* | 1258 | /* |
1248 | * A shadow entry of a recently evicted page, | 1259 | * A shadow entry of a recently evicted page, a swap |
1249 | * or a swap entry from shmem/tmpfs. Return | 1260 | * entry from shmem/tmpfs or a DAX entry. Return it |
1250 | * it without attempting to raise page count. | 1261 | * without attempting to raise page count. |
1251 | */ | 1262 | */ |
1252 | goto export; | 1263 | goto export; |
1253 | } | 1264 | } |
@@ -1494,6 +1505,74 @@ repeat: | |||
1494 | } | 1505 | } |
1495 | EXPORT_SYMBOL(find_get_pages_tag); | 1506 | EXPORT_SYMBOL(find_get_pages_tag); |
1496 | 1507 | ||
1508 | /** | ||
1509 | * find_get_entries_tag - find and return entries that match @tag | ||
1510 | * @mapping: the address_space to search | ||
1511 | * @start: the starting page cache index | ||
1512 | * @tag: the tag index | ||
1513 | * @nr_entries: the maximum number of entries | ||
1514 | * @entries: where the resulting entries are placed | ||
1515 | * @indices: the cache indices corresponding to the entries in @entries | ||
1516 | * | ||
1517 | * Like find_get_entries, except we only return entries which are tagged with | ||
1518 | * @tag. | ||
1519 | */ | ||
1520 | unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, | ||
1521 | int tag, unsigned int nr_entries, | ||
1522 | struct page **entries, pgoff_t *indices) | ||
1523 | { | ||
1524 | void **slot; | ||
1525 | unsigned int ret = 0; | ||
1526 | struct radix_tree_iter iter; | ||
1527 | |||
1528 | if (!nr_entries) | ||
1529 | return 0; | ||
1530 | |||
1531 | rcu_read_lock(); | ||
1532 | restart: | ||
1533 | radix_tree_for_each_tagged(slot, &mapping->page_tree, | ||
1534 | &iter, start, tag) { | ||
1535 | struct page *page; | ||
1536 | repeat: | ||
1537 | page = radix_tree_deref_slot(slot); | ||
1538 | if (unlikely(!page)) | ||
1539 | continue; | ||
1540 | if (radix_tree_exception(page)) { | ||
1541 | if (radix_tree_deref_retry(page)) { | ||
1542 | /* | ||
1543 | * Transient condition which can only trigger | ||
1544 | * when entry at index 0 moves out of or back | ||
1545 | * to root: none yet gotten, safe to restart. | ||
1546 | */ | ||
1547 | goto restart; | ||
1548 | } | ||
1549 | |||
1550 | /* | ||
1551 | * A shadow entry of a recently evicted page, a swap | ||
1552 | * entry from shmem/tmpfs or a DAX entry. Return it | ||
1553 | * without attempting to raise page count. | ||
1554 | */ | ||
1555 | goto export; | ||
1556 | } | ||
1557 | if (!page_cache_get_speculative(page)) | ||
1558 | goto repeat; | ||
1559 | |||
1560 | /* Has the page moved? */ | ||
1561 | if (unlikely(page != *slot)) { | ||
1562 | page_cache_release(page); | ||
1563 | goto repeat; | ||
1564 | } | ||
1565 | export: | ||
1566 | indices[ret] = iter.index; | ||
1567 | entries[ret] = page; | ||
1568 | if (++ret == nr_entries) | ||
1569 | break; | ||
1570 | } | ||
1571 | rcu_read_unlock(); | ||
1572 | return ret; | ||
1573 | } | ||
1574 | EXPORT_SYMBOL(find_get_entries_tag); | ||
1575 | |||
1497 | /* | 1576 | /* |
1498 | * CD/DVDs are error prone. When a medium error occurs, the driver may fail | 1577 | * CD/DVDs are error prone. When a medium error occurs, the driver may fail |
1499 | * a _large_ part of the i/o request. Imagine the worst scenario: | 1578 | * a _large_ part of the i/o request. Imagine the worst scenario: |
diff --git a/mm/percpu.c b/mm/percpu.c index 8a943b97a053..998607adf6eb 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size) | |||
305 | /** | 305 | /** |
306 | * pcpu_mem_free - free memory | 306 | * pcpu_mem_free - free memory |
307 | * @ptr: memory to free | 307 | * @ptr: memory to free |
308 | * @size: size of the area | ||
309 | * | 308 | * |
310 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). | 309 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). |
311 | */ | 310 | */ |
312 | static void pcpu_mem_free(void *ptr, size_t size) | 311 | static void pcpu_mem_free(void *ptr) |
313 | { | 312 | { |
314 | if (size <= PAGE_SIZE) | 313 | kvfree(ptr); |
315 | kfree(ptr); | ||
316 | else | ||
317 | vfree(ptr); | ||
318 | } | 314 | } |
319 | 315 | ||
320 | /** | 316 | /** |
@@ -463,8 +459,8 @@ out_unlock: | |||
463 | * pcpu_mem_free() might end up calling vfree() which uses | 459 | * pcpu_mem_free() might end up calling vfree() which uses |
464 | * IRQ-unsafe lock and thus can't be called under pcpu_lock. | 460 | * IRQ-unsafe lock and thus can't be called under pcpu_lock. |
465 | */ | 461 | */ |
466 | pcpu_mem_free(old, old_size); | 462 | pcpu_mem_free(old); |
467 | pcpu_mem_free(new, new_size); | 463 | pcpu_mem_free(new); |
468 | 464 | ||
469 | return 0; | 465 | return 0; |
470 | } | 466 | } |
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
732 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * | 728 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * |
733 | sizeof(chunk->map[0])); | 729 | sizeof(chunk->map[0])); |
734 | if (!chunk->map) { | 730 | if (!chunk->map) { |
735 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); | 731 | pcpu_mem_free(chunk); |
736 | return NULL; | 732 | return NULL; |
737 | } | 733 | } |
738 | 734 | ||
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) | |||
753 | { | 749 | { |
754 | if (!chunk) | 750 | if (!chunk) |
755 | return; | 751 | return; |
756 | pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); | 752 | pcpu_mem_free(chunk->map); |
757 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); | 753 | pcpu_mem_free(chunk); |
758 | } | 754 | } |
759 | 755 | ||
760 | /** | 756 | /** |
diff --git a/mm/truncate.c b/mm/truncate.c index 76e35ad97102..e3ee0e27cd17 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/backing-dev.h> | 11 | #include <linux/backing-dev.h> |
12 | #include <linux/dax.h> | ||
12 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
13 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
14 | #include <linux/swap.h> | 15 | #include <linux/swap.h> |
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping, | |||
34 | return; | 35 | return; |
35 | 36 | ||
36 | spin_lock_irq(&mapping->tree_lock); | 37 | spin_lock_irq(&mapping->tree_lock); |
37 | /* | 38 | |
38 | * Regular page slots are stabilized by the page lock even | 39 | if (dax_mapping(mapping)) { |
39 | * without the tree itself locked. These unlocked entries | 40 | if (radix_tree_delete_item(&mapping->page_tree, index, entry)) |
40 | * need verification under the tree lock. | 41 | mapping->nrexceptional--; |
41 | */ | 42 | } else { |
42 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) | 43 | /* |
43 | goto unlock; | 44 | * Regular page slots are stabilized by the page lock even |
44 | if (*slot != entry) | 45 | * without the tree itself locked. These unlocked entries |
45 | goto unlock; | 46 | * need verification under the tree lock. |
46 | radix_tree_replace_slot(slot, NULL); | 47 | */ |
47 | mapping->nrshadows--; | 48 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, |
48 | if (!node) | 49 | &slot)) |
49 | goto unlock; | 50 | goto unlock; |
50 | workingset_node_shadows_dec(node); | 51 | if (*slot != entry) |
51 | /* | 52 | goto unlock; |
52 | * Don't track node without shadow entries. | 53 | radix_tree_replace_slot(slot, NULL); |
53 | * | 54 | mapping->nrexceptional--; |
54 | * Avoid acquiring the list_lru lock if already untracked. | 55 | if (!node) |
55 | * The list_empty() test is safe as node->private_list is | 56 | goto unlock; |
56 | * protected by mapping->tree_lock. | 57 | workingset_node_shadows_dec(node); |
57 | */ | 58 | /* |
58 | if (!workingset_node_shadows(node) && | 59 | * Don't track node without shadow entries. |
59 | !list_empty(&node->private_list)) | 60 | * |
60 | list_lru_del(&workingset_shadow_nodes, &node->private_list); | 61 | * Avoid acquiring the list_lru lock if already untracked. |
61 | __radix_tree_delete_node(&mapping->page_tree, node); | 62 | * The list_empty() test is safe as node->private_list is |
63 | * protected by mapping->tree_lock. | ||
64 | */ | ||
65 | if (!workingset_node_shadows(node) && | ||
66 | !list_empty(&node->private_list)) | ||
67 | list_lru_del(&workingset_shadow_nodes, | ||
68 | &node->private_list); | ||
69 | __radix_tree_delete_node(&mapping->page_tree, node); | ||
70 | } | ||
62 | unlock: | 71 | unlock: |
63 | spin_unlock_irq(&mapping->tree_lock); | 72 | spin_unlock_irq(&mapping->tree_lock); |
64 | } | 73 | } |
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
228 | int i; | 237 | int i; |
229 | 238 | ||
230 | cleancache_invalidate_inode(mapping); | 239 | cleancache_invalidate_inode(mapping); |
231 | if (mapping->nrpages == 0 && mapping->nrshadows == 0) | 240 | if (mapping->nrpages == 0 && mapping->nrexceptional == 0) |
232 | return; | 241 | return; |
233 | 242 | ||
234 | /* Offsets within partial pages */ | 243 | /* Offsets within partial pages */ |
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages); | |||
402 | */ | 411 | */ |
403 | void truncate_inode_pages_final(struct address_space *mapping) | 412 | void truncate_inode_pages_final(struct address_space *mapping) |
404 | { | 413 | { |
405 | unsigned long nrshadows; | 414 | unsigned long nrexceptional; |
406 | unsigned long nrpages; | 415 | unsigned long nrpages; |
407 | 416 | ||
408 | /* | 417 | /* |
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping) | |||
416 | 425 | ||
417 | /* | 426 | /* |
418 | * When reclaim installs eviction entries, it increases | 427 | * When reclaim installs eviction entries, it increases |
419 | * nrshadows first, then decreases nrpages. Make sure we see | 428 | * nrexceptional first, then decreases nrpages. Make sure we see |
420 | * this in the right order or we might miss an entry. | 429 | * this in the right order or we might miss an entry. |
421 | */ | 430 | */ |
422 | nrpages = mapping->nrpages; | 431 | nrpages = mapping->nrpages; |
423 | smp_rmb(); | 432 | smp_rmb(); |
424 | nrshadows = mapping->nrshadows; | 433 | nrexceptional = mapping->nrexceptional; |
425 | 434 | ||
426 | if (nrpages || nrshadows) { | 435 | if (nrpages || nrexceptional) { |
427 | /* | 436 | /* |
428 | * As truncation uses a lockless tree lookup, cycle | 437 | * As truncation uses a lockless tree lookup, cycle |
429 | * the tree lock to make sure any ongoing tree | 438 | * the tree lock to make sure any ongoing tree |
diff --git a/mm/vmscan.c b/mm/vmscan.c index bd620b65db52..eb3dd37ccd7c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/oom.h> | 46 | #include <linux/oom.h> |
47 | #include <linux/prefetch.h> | 47 | #include <linux/prefetch.h> |
48 | #include <linux/printk.h> | 48 | #include <linux/printk.h> |
49 | #include <linux/dax.h> | ||
49 | 50 | ||
50 | #include <asm/tlbflush.h> | 51 | #include <asm/tlbflush.h> |
51 | #include <asm/div64.h> | 52 | #include <asm/div64.h> |
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
671 | * inode reclaim needs to empty out the radix tree or | 672 | * inode reclaim needs to empty out the radix tree or |
672 | * the nodes are lost. Don't plant shadows behind its | 673 | * the nodes are lost. Don't plant shadows behind its |
673 | * back. | 674 | * back. |
675 | * | ||
676 | * We also don't store shadows for DAX mappings because the | ||
677 | * only page cache pages found in these are zero pages | ||
678 | * covering holes, and because we don't want to mix DAX | ||
679 | * exceptional entries and shadow exceptional entries in the | ||
680 | * same page_tree. | ||
674 | */ | 681 | */ |
675 | if (reclaimed && page_is_file_cache(page) && | 682 | if (reclaimed && page_is_file_cache(page) && |
676 | !mapping_exiting(mapping)) | 683 | !mapping_exiting(mapping) && !dax_mapping(mapping)) |
677 | shadow = workingset_eviction(mapping, page); | 684 | shadow = workingset_eviction(mapping, page); |
678 | __delete_from_page_cache(page, shadow, memcg); | 685 | __delete_from_page_cache(page, shadow, memcg); |
679 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 686 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
diff --git a/mm/workingset.c b/mm/workingset.c index aa017133744b..61ead9e5549d 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |||
351 | node->slots[i] = NULL; | 351 | node->slots[i] = NULL; |
352 | BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); | 352 | BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); |
353 | node->count -= 1U << RADIX_TREE_COUNT_SHIFT; | 353 | node->count -= 1U << RADIX_TREE_COUNT_SHIFT; |
354 | BUG_ON(!mapping->nrshadows); | 354 | BUG_ON(!mapping->nrexceptional); |
355 | mapping->nrshadows--; | 355 | mapping->nrexceptional--; |
356 | } | 356 | } |
357 | } | 357 | } |
358 | BUG_ON(node->count); | 358 | BUG_ON(node->count); |