aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2012-05-29 18:06:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:22 -0400
commitbde05d1ccd512696b09db9dd2e5f33ad19152605 (patch)
treeaffa2c836136cac6ec0e503ce8996670d385ebbb /mm
parent5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199 (diff)
shmem: replace page if mapping excludes its zone
The GMA500 GPU driver uses GEM shmem objects, but with a new twist: the backing RAM has to be below 4GB. Not a problem while the boards supported only 4GB: but now Intel's D2700MUD boards support 8GB, and their GMA3600 is managed by the GMA500 driver. shmem/tmpfs has never pretended to support hardware restrictions on the backing memory, but it might have appeared to do so before v3.1, and even now it works fine until a page is swapped out then back in. When read_cache_page_gfp() supplied a freshly allocated page for copy, that compensated for whatever choice might have been made by earlier swapin readahead; but swapoff was likely to destroy the illusion. We'd like to continue to support GMA500, so now add a new shmem_should_replace_page() check on the zone when about to move a page from swapcache to filecache (in swapin and swapoff cases), with shmem_replace_page() to allocate and substitute a suitable page (given gma500/gem.c's mapping_set_gfp_mask GFP_KERNEL | __GFP_DMA32). This does involve a minor extension to mem_cgroup_replace_page_cache() (the page may or may not have already been charged); and I've removed a comment and call to mem_cgroup_uncharge_cache_page(), which in fact is always a no-op while PageSwapCache. Also removed optimization of an unlikely path in shmem_getpage_gfp(), now that we need to check PageSwapCache more carefully (a racing caller might already have made the copy). And at one point shmem_unuse_inode() needs to use the hitherto private page_swapcount(), to guard against racing with inode eviction. It would make sense to extend shmem_should_replace_page(), to cover cpuset and NUMA mempolicy restrictions too, but set that aside for now: needs a cleanup of shmem mempolicy handling, and more testing, and ought to handle swap faults in do_swap_page() as well as shmem. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Christoph Hellwig <hch@infradead.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Stephane Marchesin <marcheu@chromium.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: Dave Airlie <airlied@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Rob Clark <rob.clark@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c17
-rw-r--r--mm/shmem.c141
-rw-r--r--mm/swapfile.c2
3 files changed, 136 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4f71219cc53e..d7ce417cae7c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3373,7 +3373,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3373void mem_cgroup_replace_page_cache(struct page *oldpage, 3373void mem_cgroup_replace_page_cache(struct page *oldpage,
3374 struct page *newpage) 3374 struct page *newpage)
3375{ 3375{
3376 struct mem_cgroup *memcg; 3376 struct mem_cgroup *memcg = NULL;
3377 struct page_cgroup *pc; 3377 struct page_cgroup *pc;
3378 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3378 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3379 3379
@@ -3383,11 +3383,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
3383 pc = lookup_page_cgroup(oldpage); 3383 pc = lookup_page_cgroup(oldpage);
3384 /* fix accounting on old pages */ 3384 /* fix accounting on old pages */
3385 lock_page_cgroup(pc); 3385 lock_page_cgroup(pc);
3386 memcg = pc->mem_cgroup; 3386 if (PageCgroupUsed(pc)) {
3387 mem_cgroup_charge_statistics(memcg, false, -1); 3387 memcg = pc->mem_cgroup;
3388 ClearPageCgroupUsed(pc); 3388 mem_cgroup_charge_statistics(memcg, false, -1);
3389 ClearPageCgroupUsed(pc);
3390 }
3389 unlock_page_cgroup(pc); 3391 unlock_page_cgroup(pc);
3390 3392
3393 /*
3394 * When called from shmem_replace_page(), in some cases the
3395 * oldpage has already been charged, and in some cases not.
3396 */
3397 if (!memcg)
3398 return;
3399
3391 if (PageSwapBacked(oldpage)) 3400 if (PageSwapBacked(oldpage))
3392 type = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3401 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3393 3402
diff --git a/mm/shmem.c b/mm/shmem.c
index be5af34a070d..db72d8e44ec6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -103,6 +103,9 @@ static unsigned long shmem_default_max_inodes(void)
103} 103}
104#endif 104#endif
105 105
106static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
107static int shmem_replace_page(struct page **pagep, gfp_t gfp,
108 struct shmem_inode_info *info, pgoff_t index);
106static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 109static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
107 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); 110 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
108 111
@@ -604,12 +607,13 @@ static void shmem_evict_inode(struct inode *inode)
604 * If swap found in inode, free it and move page from swapcache to filecache. 607 * If swap found in inode, free it and move page from swapcache to filecache.
605 */ 608 */
606static int shmem_unuse_inode(struct shmem_inode_info *info, 609static int shmem_unuse_inode(struct shmem_inode_info *info,
607 swp_entry_t swap, struct page *page) 610 swp_entry_t swap, struct page **pagep)
608{ 611{
609 struct address_space *mapping = info->vfs_inode.i_mapping; 612 struct address_space *mapping = info->vfs_inode.i_mapping;
610 void *radswap; 613 void *radswap;
611 pgoff_t index; 614 pgoff_t index;
612 int error; 615 gfp_t gfp;
616 int error = 0;
613 617
614 radswap = swp_to_radix_entry(swap); 618 radswap = swp_to_radix_entry(swap);
615 index = radix_tree_locate_item(&mapping->page_tree, radswap); 619 index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +629,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
625 if (shmem_swaplist.next != &info->swaplist) 629 if (shmem_swaplist.next != &info->swaplist)
626 list_move_tail(&shmem_swaplist, &info->swaplist); 630 list_move_tail(&shmem_swaplist, &info->swaplist);
627 631
632 gfp = mapping_gfp_mask(mapping);
633 if (shmem_should_replace_page(*pagep, gfp)) {
634 mutex_unlock(&shmem_swaplist_mutex);
635 error = shmem_replace_page(pagep, gfp, info, index);
636 mutex_lock(&shmem_swaplist_mutex);
637 /*
638 * We needed to drop mutex to make that restrictive page
639 * allocation; but the inode might already be freed by now,
640 * and we cannot refer to inode or mapping or info to check.
641 * However, we do hold page lock on the PageSwapCache page,
642 * so can check if that still has our reference remaining.
643 */
644 if (!page_swapcount(*pagep))
645 error = -ENOENT;
646 }
647
628 /* 648 /*
629 * We rely on shmem_swaplist_mutex, not only to protect the swaplist, 649 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
630 * but also to hold up shmem_evict_inode(): so inode cannot be freed 650 * but also to hold up shmem_evict_inode(): so inode cannot be freed
631 * beneath us (pagelock doesn't help until the page is in pagecache). 651 * beneath us (pagelock doesn't help until the page is in pagecache).
632 */ 652 */
633 error = shmem_add_to_page_cache(page, mapping, index, 653 if (!error)
654 error = shmem_add_to_page_cache(*pagep, mapping, index,
634 GFP_NOWAIT, radswap); 655 GFP_NOWAIT, radswap);
635 /* which does mem_cgroup_uncharge_cache_page on error */
636
637 if (error != -ENOMEM) { 656 if (error != -ENOMEM) {
638 /* 657 /*
639 * Truncation and eviction use free_swap_and_cache(), which 658 * Truncation and eviction use free_swap_and_cache(), which
640 * only does trylock page: if we raced, best clean up here. 659 * only does trylock page: if we raced, best clean up here.
641 */ 660 */
642 delete_from_swap_cache(page); 661 delete_from_swap_cache(*pagep);
643 set_page_dirty(page); 662 set_page_dirty(*pagep);
644 if (!error) { 663 if (!error) {
645 spin_lock(&info->lock); 664 spin_lock(&info->lock);
646 info->swapped--; 665 info->swapped--;
@@ -660,7 +679,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
660 struct list_head *this, *next; 679 struct list_head *this, *next;
661 struct shmem_inode_info *info; 680 struct shmem_inode_info *info;
662 int found = 0; 681 int found = 0;
663 int error; 682 int error = 0;
683
684 /*
685 * There's a faint possibility that swap page was replaced before
686 * caller locked it: it will come back later with the right page.
687 */
688 if (unlikely(!PageSwapCache(page)))
689 goto out;
664 690
665 /* 691 /*
666 * Charge page using GFP_KERNEL while we can wait, before taking 692 * Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +702,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
676 list_for_each_safe(this, next, &shmem_swaplist) { 702 list_for_each_safe(this, next, &shmem_swaplist) {
677 info = list_entry(this, struct shmem_inode_info, swaplist); 703 info = list_entry(this, struct shmem_inode_info, swaplist);
678 if (info->swapped) 704 if (info->swapped)
679 found = shmem_unuse_inode(info, swap, page); 705 found = shmem_unuse_inode(info, swap, &page);
680 else 706 else
681 list_del_init(&info->swaplist); 707 list_del_init(&info->swaplist);
682 cond_resched(); 708 cond_resched();
@@ -685,8 +711,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
685 } 711 }
686 mutex_unlock(&shmem_swaplist_mutex); 712 mutex_unlock(&shmem_swaplist_mutex);
687 713
688 if (!found)
689 mem_cgroup_uncharge_cache_page(page);
690 if (found < 0) 714 if (found < 0)
691 error = found; 715 error = found;
692out: 716out:
@@ -856,6 +880,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
856#endif 880#endif
857 881
858/* 882/*
883 * When a page is moved from swapcache to shmem filecache (either by the
884 * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
885 * shmem_unuse_inode()), it may have been read in earlier from swap, in
886 * ignorance of the mapping it belongs to. If that mapping has special
887 * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
888 * we may need to copy to a suitable page before moving to filecache.
889 *
890 * In a future release, this may well be extended to respect cpuset and
891 * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
892 * but for now it is a simple matter of zone.
893 */
894static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
895{
896 return page_zonenum(page) > gfp_zone(gfp);
897}
898
899static int shmem_replace_page(struct page **pagep, gfp_t gfp,
900 struct shmem_inode_info *info, pgoff_t index)
901{
902 struct page *oldpage, *newpage;
903 struct address_space *swap_mapping;
904 pgoff_t swap_index;
905 int error;
906
907 oldpage = *pagep;
908 swap_index = page_private(oldpage);
909 swap_mapping = page_mapping(oldpage);
910
911 /*
912 * We have arrived here because our zones are constrained, so don't
913 * limit chance of success by further cpuset and node constraints.
914 */
915 gfp &= ~GFP_CONSTRAINT_MASK;
916 newpage = shmem_alloc_page(gfp, info, index);
917 if (!newpage)
918 return -ENOMEM;
919 VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
920
921 *pagep = newpage;
922 page_cache_get(newpage);
923 copy_highpage(newpage, oldpage);
924
925 VM_BUG_ON(!PageLocked(oldpage));
926 __set_page_locked(newpage);
927 VM_BUG_ON(!PageUptodate(oldpage));
928 SetPageUptodate(newpage);
929 VM_BUG_ON(!PageSwapBacked(oldpage));
930 SetPageSwapBacked(newpage);
931 VM_BUG_ON(!swap_index);
932 set_page_private(newpage, swap_index);
933 VM_BUG_ON(!PageSwapCache(oldpage));
934 SetPageSwapCache(newpage);
935
936 /*
937 * Our caller will very soon move newpage out of swapcache, but it's
938 * a nice clean interface for us to replace oldpage by newpage there.
939 */
940 spin_lock_irq(&swap_mapping->tree_lock);
941 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
942 newpage);
943 __inc_zone_page_state(newpage, NR_FILE_PAGES);
944 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
945 spin_unlock_irq(&swap_mapping->tree_lock);
946 BUG_ON(error);
947
948 mem_cgroup_replace_page_cache(oldpage, newpage);
949 lru_cache_add_anon(newpage);
950
951 ClearPageSwapCache(oldpage);
952 set_page_private(oldpage, 0);
953
954 unlock_page(oldpage);
955 page_cache_release(oldpage);
956 page_cache_release(oldpage);
957 return 0;
958}
959
960/*
859 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 961 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
860 * 962 *
861 * If we allocate a new one we do not mark it dirty. That's up to the 963 * If we allocate a new one we do not mark it dirty. That's up to the
@@ -923,19 +1025,20 @@ repeat:
923 1025
924 /* We have to do this with page locked to prevent races */ 1026 /* We have to do this with page locked to prevent races */
925 lock_page(page); 1027 lock_page(page);
1028 if (!PageSwapCache(page) || page->mapping) {
1029 error = -EEXIST; /* try again */
1030 goto failed;
1031 }
926 if (!PageUptodate(page)) { 1032 if (!PageUptodate(page)) {
927 error = -EIO; 1033 error = -EIO;
928 goto failed; 1034 goto failed;
929 } 1035 }
930 wait_on_page_writeback(page); 1036 wait_on_page_writeback(page);
931 1037
932 /* Someone may have already done it for us */ 1038 if (shmem_should_replace_page(page, gfp)) {
933 if (page->mapping) { 1039 error = shmem_replace_page(&page, gfp, info, index);
934 if (page->mapping == mapping && 1040 if (error)
935 page->index == index) 1041 goto failed;
936 goto done;
937 error = -EEXIST;
938 goto failed;
939 } 1042 }
940 1043
941 error = mem_cgroup_cache_charge(page, current->mm, 1044 error = mem_cgroup_cache_charge(page, current->mm,
@@ -998,7 +1101,7 @@ repeat:
998 if (sgp == SGP_DIRTY) 1101 if (sgp == SGP_DIRTY)
999 set_page_dirty(page); 1102 set_page_dirty(page);
1000 } 1103 }
1001done: 1104
1002 /* Perhaps the file has been truncated since we checked */ 1105 /* Perhaps the file has been truncated since we checked */
1003 if (sgp != SGP_WRITE && 1106 if (sgp != SGP_WRITE &&
1004 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { 1107 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..b0c86e92f42c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
601 * This does not give an exact answer when swap count is continued, 601 * This does not give an exact answer when swap count is continued,
602 * but does include the high COUNT_CONTINUED flag to allow for that. 602 * but does include the high COUNT_CONTINUED flag to allow for that.
603 */ 603 */
604static inline int page_swapcount(struct page *page) 604int page_swapcount(struct page *page)
605{ 605{
606 int count = 0; 606 int count = 0;
607 struct swap_info_struct *p; 607 struct swap_info_struct *p;