aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:46:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:41 -0500
commit3f04f62f90d46a82dd73027c5fd7a15daed5c33d (patch)
treec267f1bdde3f3a7b2a8b52347fa5ad6053cff435
parentbae9c19bf12bb2a914a8e530270f41d36cc87c63 (diff)
thp: split_huge_page paging
Paging logic that splits the page before it is unmapped and added to swap to ensure backwards compatibility with the legacy swap code. Eventually swap should natively pageout the hugepages to increase performance and decrease seeking and fragmentation of swap space. swapoff can just skip over huge pmd as they cannot be part of swap yet. In add_to_swap be careful to split the page only if we got a valid swap entry so we don't split hugepages with a full swap. In theory we could split pages before isolating them during the lru scan, but for khugepaged to be safe, I'm relying on either mmap_sem write mode, or PG_lock taken, so split_huge_page has to run either with mmap_sem read/write mode or PG_lock taken. Calling it from isolate_lru_page would make locking more complicated, in addition to that split_huge_page would deadlock if called by __isolate_lru_page because it has to take the lru lock to add the tail pages. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/swap_state.c6
-rw-r--r--mm/swapfile.c2
4 files changed, 11 insertions, 0 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2323a8039a98..6a283cc9317c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -386,6 +386,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
386 struct task_struct *tsk; 386 struct task_struct *tsk;
387 struct anon_vma *av; 387 struct anon_vma *av;
388 388
389 if (unlikely(split_huge_page(page)))
390 return;
389 read_lock(&tasklist_lock); 391 read_lock(&tasklist_lock);
390 av = page_lock_anon_vma(page); 392 av = page_lock_anon_vma(page);
391 if (av == NULL) /* Not actually mapped anymore */ 393 if (av == NULL) /* Not actually mapped anymore */
diff --git a/mm/rmap.c b/mm/rmap.c
index c95d2ba27a0b..a3197a8a295b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1400,6 +1400,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1400 int ret; 1400 int ret;
1401 1401
1402 BUG_ON(!PageLocked(page)); 1402 BUG_ON(!PageLocked(page));
1403 BUG_ON(PageTransHuge(page));
1403 1404
1404 if (unlikely(PageKsm(page))) 1405 if (unlikely(PageKsm(page)))
1405 ret = try_to_unmap_ksm(page, flags); 1406 ret = try_to_unmap_ksm(page, flags);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e10f5833167f..5c8cfabbc9bc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -157,6 +157,12 @@ int add_to_swap(struct page *page)
157 if (!entry.val) 157 if (!entry.val)
158 return 0; 158 return 0;
159 159
160 if (unlikely(PageTransHuge(page)))
161 if (unlikely(split_huge_page(page))) {
162 swapcache_free(entry, NULL);
163 return 0;
164 }
165
160 /* 166 /*
161 * Radix-tree node allocations from PF_MEMALLOC contexts could 167 * Radix-tree node allocations from PF_MEMALLOC contexts could
162 * completely exhaust the page allocator. __GFP_NOMEMALLOC 168 * completely exhaust the page allocator. __GFP_NOMEMALLOC
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b6adcfbf6f48..07a458d72fa8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -964,6 +964,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
964 pmd = pmd_offset(pud, addr); 964 pmd = pmd_offset(pud, addr);
965 do { 965 do {
966 next = pmd_addr_end(addr, end); 966 next = pmd_addr_end(addr, end);
967 if (unlikely(pmd_trans_huge(*pmd)))
968 continue;
967 if (pmd_none_or_clear_bad(pmd)) 969 if (pmd_none_or_clear_bad(pmd))
968 continue; 970 continue;
969 ret = unuse_pte_range(vma, pmd, addr, next, entry, page); 971 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);