aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-02-05 01:28:40 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-05 12:44:14 -0500
commitc4cc6d07b2f465fbf5efd99bbe772a49c515f3f2 (patch)
tree0aec353fce4ee6cd16e3051ecfdf55e8022aaa5a /mm/shmem.c
parent75897d60a54ccee94253312107f941a83b5077cb (diff)
swapin_readahead: excise NUMA bogosity
For three years swapin_readahead has been cluttered with fanciful CONFIG_NUMA code, advancing addr, and stepping on to the next vma at the boundary, to line up the mempolicy for each page allocation. It _might_ be a good idea to allocate swap more according to vma layout; but the fact is, that's not how we do it at all, 2.6 even less than 2.4: swap is allocated as needed for pages as they sink to the bottom of the inactive LRUs. Sometimes that may match vma layout, but not so often that it's worth going to these misleading vma->vm_next lengths: rip all that out. Originally I intended to retain the incrementation of addr, but correct its initial value: valid_swaphandles generally supplies an offset below the target addr (this is readaround rather than readahead), but addr has not been adjusted accordingly, so in the interleave case it has usually been allocating the target page from the "wrong" node (though that may not matter very much). But look at the equivalent shmem_swapin code: either by oversight or by design, though it has all the apparatus for choosing a new mempolicy per page, it uses the same idx throughout, choosing the same mempolicy and interleave node for each page of the cluster. Which is actually a much better strategy: each node has its own LRUs and its own kswapd, so if you're betting on any particular relationship between swap and node, the best bet is that nearby swap entries belong to pages from the same node - even when the mempolicy of the target page is to interleave. And examining a map of nodes corresponding to swap entries on a numa=fake system bears this out. (We could later tweak swap allocation to make it even more likely, but this patch is merely about removing cruft.) So, neither adjust nor increment addr in swapin_readahead, and then shmem_swapin can use it too; the pseudo-vma to pass policy need only be set up once per cluster, and so few fields of pvma are used, let's skip the memset - from shmem_alloc_page also. Signed-off-by: Hugh Dickins <hugh@veritas.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <ak@suse.de> Cc: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c43
1 files changed, 12 insertions, 31 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 51b3d6ccddab..88c6685f16b7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1025,53 +1025,34 @@ out:
1025 return err; 1025 return err;
1026} 1026}
1027 1027
1028static struct page *shmem_swapin_async(struct shared_policy *p, 1028static struct page *shmem_swapin(struct shmem_inode_info *info,
1029 swp_entry_t entry, unsigned long idx) 1029 swp_entry_t entry, unsigned long idx)
1030{ 1030{
1031 struct page *page;
1032 struct vm_area_struct pvma; 1031 struct vm_area_struct pvma;
1032 struct page *page;
1033 1033
1034 /* Create a pseudo vma that just contains the policy */ 1034 /* Create a pseudo vma that just contains the policy */
1035 memset(&pvma, 0, sizeof(struct vm_area_struct)); 1035 pvma.vm_start = 0;
1036 pvma.vm_end = PAGE_SIZE;
1037 pvma.vm_pgoff = idx; 1036 pvma.vm_pgoff = idx;
1038 pvma.vm_policy = mpol_shared_policy_lookup(p, idx); 1037 pvma.vm_ops = NULL;
1038 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
1039 swapin_readahead(entry, 0, &pvma);
1039 page = read_swap_cache_async(entry, &pvma, 0); 1040 page = read_swap_cache_async(entry, &pvma, 0);
1040 mpol_free(pvma.vm_policy); 1041 mpol_free(pvma.vm_policy);
1041 return page; 1042 return page;
1042} 1043}
1043 1044
1044static struct page *shmem_swapin(struct shmem_inode_info *info, 1045static struct page *shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
1045 swp_entry_t entry, unsigned long idx) 1046 unsigned long idx)
1046{
1047 struct shared_policy *p = &info->policy;
1048 int i, num;
1049 struct page *page;
1050 unsigned long offset;
1051
1052 num = valid_swaphandles(entry, &offset);
1053 for (i = 0; i < num; offset++, i++) {
1054 page = shmem_swapin_async(p,
1055 swp_entry(swp_type(entry), offset), idx);
1056 if (!page)
1057 break;
1058 page_cache_release(page);
1059 }
1060 lru_add_drain(); /* Push any new pages onto the LRU now */
1061 return shmem_swapin_async(p, entry, idx);
1062}
1063
1064static struct page *
1065shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
1066 unsigned long idx)
1067{ 1047{
1068 struct vm_area_struct pvma; 1048 struct vm_area_struct pvma;
1069 struct page *page; 1049 struct page *page;
1070 1050
1071 memset(&pvma, 0, sizeof(struct vm_area_struct)); 1051 /* Create a pseudo vma that just contains the policy */
1072 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); 1052 pvma.vm_start = 0;
1073 pvma.vm_pgoff = idx; 1053 pvma.vm_pgoff = idx;
1074 pvma.vm_end = PAGE_SIZE; 1054 pvma.vm_ops = NULL;
1055 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
1075 page = alloc_page_vma(gfp, &pvma, 0); 1056 page = alloc_page_vma(gfp, &pvma, 0);
1076 mpol_free(pvma.vm_policy); 1057 mpol_free(pvma.vm_policy);
1077 return page; 1058 return page;