diff options
author | Hugh Dickins <hugh.dickins@tiscali.co.uk> | 2009-12-14 20:59:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-15 11:53:20 -0500 |
commit | e9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 (patch) | |
tree | df4324273856e06b8277b7e4a0fa9289eb8e6385 /mm/migrate.c | |
parent | 407f9c8b0889ced1dbe2f9157e4e60c61329d5c9 (diff) |
ksm: rmap_walk to remove_migation_ptes
A side-effect of making ksm pages swappable is that they have to be placed
on the LRUs: which then exposes them to isolate_lru_page() and hence to
page migration.
Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and
rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c. Perhaps some
consolidation with existing code is possible, but don't attempt that yet
(try_to_unmap needs to handle nonlinears, but migration pte removal does
not).
rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like
remove_anon_migration_ptes() which it replaces, avoids calling
page_lock_anon_vma(), because that includes a page_mapped() test which
fails when all migration ptes are in place. That was valid when NUMA page
migration was introduced (holding mmap_sem provided the missing guarantee
that anon_vma's slab had not already been destroyed), but I believe not
valid in the memory hotremove case added since.
For now do the same as before, and consider the best way to fix that
unlikely race later on. When fixed, we can probably use rmap_walk() on
hwpoisoned ksm pages too: for now, they remain among hwpoison's various
exceptions (its PageKsm test comes before the page is locked, but its
page_lock_anon_vma fails safely if an anon gets upgraded).
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 85 |
1 files changed, 18 insertions, 67 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 367272d04423..0b714747c028 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
22 | #include <linux/nsproxy.h> | 22 | #include <linux/nsproxy.h> |
23 | #include <linux/pagevec.h> | 23 | #include <linux/pagevec.h> |
24 | #include <linux/ksm.h> | ||
24 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
25 | #include <linux/topology.h> | 26 | #include <linux/topology.h> |
26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
@@ -78,8 +79,8 @@ int putback_lru_pages(struct list_head *l) | |||
78 | /* | 79 | /* |
79 | * Restore a potential migration pte to a working pte entry | 80 | * Restore a potential migration pte to a working pte entry |
80 | */ | 81 | */ |
81 | static void remove_migration_pte(struct vm_area_struct *vma, | 82 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
82 | struct page *old, struct page *new) | 83 | unsigned long addr, void *old) |
83 | { | 84 | { |
84 | struct mm_struct *mm = vma->vm_mm; | 85 | struct mm_struct *mm = vma->vm_mm; |
85 | swp_entry_t entry; | 86 | swp_entry_t entry; |
@@ -88,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
88 | pmd_t *pmd; | 89 | pmd_t *pmd; |
89 | pte_t *ptep, pte; | 90 | pte_t *ptep, pte; |
90 | spinlock_t *ptl; | 91 | spinlock_t *ptl; |
91 | unsigned long addr = page_address_in_vma(new, vma); | ||
92 | |||
93 | if (addr == -EFAULT) | ||
94 | return; | ||
95 | 92 | ||
96 | pgd = pgd_offset(mm, addr); | 93 | pgd = pgd_offset(mm, addr); |
97 | if (!pgd_present(*pgd)) | 94 | if (!pgd_present(*pgd)) |
98 | return; | 95 | goto out; |
99 | 96 | ||
100 | pud = pud_offset(pgd, addr); | 97 | pud = pud_offset(pgd, addr); |
101 | if (!pud_present(*pud)) | 98 | if (!pud_present(*pud)) |
102 | return; | 99 | goto out; |
103 | 100 | ||
104 | pmd = pmd_offset(pud, addr); | 101 | pmd = pmd_offset(pud, addr); |
105 | if (!pmd_present(*pmd)) | 102 | if (!pmd_present(*pmd)) |
106 | return; | 103 | goto out; |
107 | 104 | ||
108 | ptep = pte_offset_map(pmd, addr); | 105 | ptep = pte_offset_map(pmd, addr); |
109 | 106 | ||
110 | if (!is_swap_pte(*ptep)) { | 107 | if (!is_swap_pte(*ptep)) { |
111 | pte_unmap(ptep); | 108 | pte_unmap(ptep); |
112 | return; | 109 | goto out; |
113 | } | 110 | } |
114 | 111 | ||
115 | ptl = pte_lockptr(mm, pmd); | 112 | ptl = pte_lockptr(mm, pmd); |
116 | spin_lock(ptl); | 113 | spin_lock(ptl); |
117 | pte = *ptep; | 114 | pte = *ptep; |
118 | if (!is_swap_pte(pte)) | 115 | if (!is_swap_pte(pte)) |
119 | goto out; | 116 | goto unlock; |
120 | 117 | ||
121 | entry = pte_to_swp_entry(pte); | 118 | entry = pte_to_swp_entry(pte); |
122 | 119 | ||
123 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 120 | if (!is_migration_entry(entry) || |
124 | goto out; | 121 | migration_entry_to_page(entry) != old) |
122 | goto unlock; | ||
125 | 123 | ||
126 | get_page(new); | 124 | get_page(new); |
127 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 125 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
@@ -137,55 +135,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
137 | 135 | ||
138 | /* No need to invalidate - it was non-present before */ | 136 | /* No need to invalidate - it was non-present before */ |
139 | update_mmu_cache(vma, addr, pte); | 137 | update_mmu_cache(vma, addr, pte); |
140 | 138 | unlock: | |
141 | out: | ||
142 | pte_unmap_unlock(ptep, ptl); | 139 | pte_unmap_unlock(ptep, ptl); |
143 | } | 140 | out: |
144 | 141 | return SWAP_AGAIN; | |
145 | /* | ||
146 | * Note that remove_file_migration_ptes will only work on regular mappings, | ||
147 | * Nonlinear mappings do not use migration entries. | ||
148 | */ | ||
149 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
150 | { | ||
151 | struct vm_area_struct *vma; | ||
152 | struct address_space *mapping = new->mapping; | ||
153 | struct prio_tree_iter iter; | ||
154 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
155 | |||
156 | if (!mapping) | ||
157 | return; | ||
158 | |||
159 | spin_lock(&mapping->i_mmap_lock); | ||
160 | |||
161 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
162 | remove_migration_pte(vma, old, new); | ||
163 | |||
164 | spin_unlock(&mapping->i_mmap_lock); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
169 | * the page so that the anon_vma cannot vanish. | ||
170 | */ | ||
171 | static void remove_anon_migration_ptes(struct page *old, struct page *new) | ||
172 | { | ||
173 | struct anon_vma *anon_vma; | ||
174 | struct vm_area_struct *vma; | ||
175 | |||
176 | /* | ||
177 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
178 | */ | ||
179 | anon_vma = page_anon_vma(new); | ||
180 | if (!anon_vma) | ||
181 | return; | ||
182 | |||
183 | spin_lock(&anon_vma->lock); | ||
184 | |||
185 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
186 | remove_migration_pte(vma, old, new); | ||
187 | |||
188 | spin_unlock(&anon_vma->lock); | ||
189 | } | 142 | } |
190 | 143 | ||
191 | /* | 144 | /* |
@@ -194,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) | |||
194 | */ | 147 | */ |
195 | static void remove_migration_ptes(struct page *old, struct page *new) | 148 | static void remove_migration_ptes(struct page *old, struct page *new) |
196 | { | 149 | { |
197 | if (PageAnon(new)) | 150 | rmap_walk(new, remove_migration_pte, old); |
198 | remove_anon_migration_ptes(old, new); | ||
199 | else | ||
200 | remove_file_migration_ptes(old, new); | ||
201 | } | 151 | } |
202 | 152 | ||
203 | /* | 153 | /* |
@@ -358,6 +308,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
358 | } | 308 | } |
359 | 309 | ||
360 | mlock_migrate_page(newpage, page); | 310 | mlock_migrate_page(newpage, page); |
311 | ksm_migrate_page(newpage, page); | ||
361 | 312 | ||
362 | ClearPageSwapCache(page); | 313 | ClearPageSwapCache(page); |
363 | ClearPagePrivate(page); | 314 | ClearPagePrivate(page); |
@@ -577,9 +528,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
577 | else | 528 | else |
578 | rc = fallback_migrate_page(mapping, newpage, page); | 529 | rc = fallback_migrate_page(mapping, newpage, page); |
579 | 530 | ||
580 | if (!rc) { | 531 | if (!rc) |
581 | remove_migration_ptes(page, newpage); | 532 | remove_migration_ptes(page, newpage); |
582 | } else | 533 | else |
583 | newpage->mapping = NULL; | 534 | newpage->mapping = NULL; |
584 | 535 | ||
585 | unlock_page(newpage); | 536 | unlock_page(newpage); |