diff options
author | Dave Jones <davej@redhat.com> | 2006-06-29 16:01:54 -0400 |
---|---|---|
committer | Dave Jones <davej@redhat.com> | 2006-06-29 16:01:54 -0400 |
commit | 55b4d6a52195a8f277ffddf755ddaff359878f41 (patch) | |
tree | 06a3183a562f8da4688f65023f7a18dcad702956 /mm/migrate.c | |
parent | adf8a287150667feb5747f8beade62acacc17d4e (diff) | |
parent | 1f1332f727c3229eb2166a83fec5d3de6a73dce2 (diff) |
Merge ../linus
Conflicts:
drivers/char/agp/Kconfig
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 1076 |
1 files changed, 705 insertions, 371 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 1c25040693d2..3f1e0c2c942c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/swapops.h> | ||
18 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
19 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
20 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
@@ -23,13 +24,13 @@ | |||
23 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
24 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
25 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
26 | #include <linux/swapops.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/mempolicy.h> | ||
29 | #include <linux/vmalloc.h> | ||
30 | #include <linux/security.h> | ||
27 | 31 | ||
28 | #include "internal.h" | 32 | #include "internal.h" |
29 | 33 | ||
30 | /* The maximum number of pages to take off the LRU for migration */ | ||
31 | #define MIGRATE_CHUNK_SIZE 256 | ||
32 | |||
33 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 34 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
34 | 35 | ||
35 | /* | 36 | /* |
@@ -64,16 +65,11 @@ int isolate_lru_page(struct page *page, struct list_head *pagelist) | |||
64 | } | 65 | } |
65 | 66 | ||
66 | /* | 67 | /* |
67 | * migrate_prep() needs to be called after we have compiled the list of pages | 68 | * migrate_prep() needs to be called before we start compiling a list of pages |
68 | * to be migrated using isolate_lru_page() but before we begin a series of calls | 69 | * to be migrated using isolate_lru_page(). |
69 | * to migrate_pages(). | ||
70 | */ | 70 | */ |
71 | int migrate_prep(void) | 71 | int migrate_prep(void) |
72 | { | 72 | { |
73 | /* Must have swap device for migration */ | ||
74 | if (nr_swap_pages <= 0) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* | 73 | /* |
78 | * Clear the LRU lists so pages can be isolated. | 74 | * Clear the LRU lists so pages can be isolated. |
79 | * Note that pages may be moved off the LRU after we have | 75 | * Note that pages may be moved off the LRU after we have |
@@ -87,7 +83,6 @@ int migrate_prep(void) | |||
87 | 83 | ||
88 | static inline void move_to_lru(struct page *page) | 84 | static inline void move_to_lru(struct page *page) |
89 | { | 85 | { |
90 | list_del(&page->lru); | ||
91 | if (PageActive(page)) { | 86 | if (PageActive(page)) { |
92 | /* | 87 | /* |
93 | * lru_cache_add_active checks that | 88 | * lru_cache_add_active checks that |
@@ -113,113 +108,200 @@ int putback_lru_pages(struct list_head *l) | |||
113 | int count = 0; | 108 | int count = 0; |
114 | 109 | ||
115 | list_for_each_entry_safe(page, page2, l, lru) { | 110 | list_for_each_entry_safe(page, page2, l, lru) { |
111 | list_del(&page->lru); | ||
116 | move_to_lru(page); | 112 | move_to_lru(page); |
117 | count++; | 113 | count++; |
118 | } | 114 | } |
119 | return count; | 115 | return count; |
120 | } | 116 | } |
121 | 117 | ||
122 | /* | 118 | static inline int is_swap_pte(pte_t pte) |
123 | * Non migratable page | ||
124 | */ | ||
125 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
126 | { | 119 | { |
127 | return -EIO; | 120 | return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); |
128 | } | 121 | } |
129 | EXPORT_SYMBOL(fail_migrate_page); | ||
130 | 122 | ||
131 | /* | 123 | /* |
132 | * swapout a single page | 124 | * Restore a potential migration pte to a working pte entry |
133 | * page is locked upon entry, unlocked on exit | ||
134 | */ | 125 | */ |
135 | static int swap_page(struct page *page) | 126 | static void remove_migration_pte(struct vm_area_struct *vma, |
127 | struct page *old, struct page *new) | ||
136 | { | 128 | { |
137 | struct address_space *mapping = page_mapping(page); | 129 | struct mm_struct *mm = vma->vm_mm; |
130 | swp_entry_t entry; | ||
131 | pgd_t *pgd; | ||
132 | pud_t *pud; | ||
133 | pmd_t *pmd; | ||
134 | pte_t *ptep, pte; | ||
135 | spinlock_t *ptl; | ||
136 | unsigned long addr = page_address_in_vma(new, vma); | ||
137 | |||
138 | if (addr == -EFAULT) | ||
139 | return; | ||
140 | |||
141 | pgd = pgd_offset(mm, addr); | ||
142 | if (!pgd_present(*pgd)) | ||
143 | return; | ||
144 | |||
145 | pud = pud_offset(pgd, addr); | ||
146 | if (!pud_present(*pud)) | ||
147 | return; | ||
148 | |||
149 | pmd = pmd_offset(pud, addr); | ||
150 | if (!pmd_present(*pmd)) | ||
151 | return; | ||
152 | |||
153 | ptep = pte_offset_map(pmd, addr); | ||
154 | |||
155 | if (!is_swap_pte(*ptep)) { | ||
156 | pte_unmap(ptep); | ||
157 | return; | ||
158 | } | ||
138 | 159 | ||
139 | if (page_mapped(page) && mapping) | 160 | ptl = pte_lockptr(mm, pmd); |
140 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | 161 | spin_lock(ptl); |
141 | goto unlock_retry; | 162 | pte = *ptep; |
163 | if (!is_swap_pte(pte)) | ||
164 | goto out; | ||
142 | 165 | ||
143 | if (PageDirty(page)) { | 166 | entry = pte_to_swp_entry(pte); |
144 | /* Page is dirty, try to write it out here */ | ||
145 | switch(pageout(page, mapping)) { | ||
146 | case PAGE_KEEP: | ||
147 | case PAGE_ACTIVATE: | ||
148 | goto unlock_retry; | ||
149 | 167 | ||
150 | case PAGE_SUCCESS: | 168 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
151 | goto retry; | 169 | goto out; |
152 | 170 | ||
153 | case PAGE_CLEAN: | 171 | get_page(new); |
154 | ; /* try to free the page below */ | 172 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
155 | } | 173 | if (is_write_migration_entry(entry)) |
156 | } | 174 | pte = pte_mkwrite(pte); |
175 | set_pte_at(mm, addr, ptep, pte); | ||
157 | 176 | ||
158 | if (PagePrivate(page)) { | 177 | if (PageAnon(new)) |
159 | if (!try_to_release_page(page, GFP_KERNEL) || | 178 | page_add_anon_rmap(new, vma, addr); |
160 | (!mapping && page_count(page) == 1)) | 179 | else |
161 | goto unlock_retry; | 180 | page_add_file_rmap(new); |
162 | } | ||
163 | 181 | ||
164 | if (remove_mapping(mapping, page)) { | 182 | /* No need to invalidate - it was non-present before */ |
165 | /* Success */ | 183 | update_mmu_cache(vma, addr, pte); |
166 | unlock_page(page); | 184 | lazy_mmu_prot_update(pte); |
167 | return 0; | ||
168 | } | ||
169 | 185 | ||
170 | unlock_retry: | 186 | out: |
171 | unlock_page(page); | 187 | pte_unmap_unlock(ptep, ptl); |
188 | } | ||
172 | 189 | ||
173 | retry: | 190 | /* |
174 | return -EAGAIN; | 191 | * Note that remove_file_migration_ptes will only work on regular mappings, |
192 | * Nonlinear mappings do not use migration entries. | ||
193 | */ | ||
194 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
195 | { | ||
196 | struct vm_area_struct *vma; | ||
197 | struct address_space *mapping = page_mapping(new); | ||
198 | struct prio_tree_iter iter; | ||
199 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
200 | |||
201 | if (!mapping) | ||
202 | return; | ||
203 | |||
204 | spin_lock(&mapping->i_mmap_lock); | ||
205 | |||
206 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
207 | remove_migration_pte(vma, old, new); | ||
208 | |||
209 | spin_unlock(&mapping->i_mmap_lock); | ||
175 | } | 210 | } |
176 | 211 | ||
177 | /* | 212 | /* |
178 | * Remove references for a page and establish the new page with the correct | 213 | * Must hold mmap_sem lock on at least one of the vmas containing |
179 | * basic settings to be able to stop accesses to the page. | 214 | * the page so that the anon_vma cannot vanish. |
180 | */ | 215 | */ |
181 | int migrate_page_remove_references(struct page *newpage, | 216 | static void remove_anon_migration_ptes(struct page *old, struct page *new) |
182 | struct page *page, int nr_refs) | ||
183 | { | 217 | { |
184 | struct address_space *mapping = page_mapping(page); | 218 | struct anon_vma *anon_vma; |
185 | struct page **radix_pointer; | 219 | struct vm_area_struct *vma; |
220 | unsigned long mapping; | ||
186 | 221 | ||
187 | /* | 222 | mapping = (unsigned long)new->mapping; |
188 | * Avoid doing any of the following work if the page count | ||
189 | * indicates that the page is in use or truncate has removed | ||
190 | * the page. | ||
191 | */ | ||
192 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
193 | return -EAGAIN; | ||
194 | 223 | ||
195 | /* | 224 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) |
196 | * Establish swap ptes for anonymous pages or destroy pte | 225 | return; |
197 | * maps for files. | ||
198 | * | ||
199 | * In order to reestablish file backed mappings the fault handlers | ||
200 | * will take the radix tree_lock which may then be used to stop | ||
201 | * processses from accessing this page until the new page is ready. | ||
202 | * | ||
203 | * A process accessing via a swap pte (an anonymous page) will take a | ||
204 | * page_lock on the old page which will block the process until the | ||
205 | * migration attempt is complete. At that time the PageSwapCache bit | ||
206 | * will be examined. If the page was migrated then the PageSwapCache | ||
207 | * bit will be clear and the operation to retrieve the page will be | ||
208 | * retried which will find the new page in the radix tree. Then a new | ||
209 | * direct mapping may be generated based on the radix tree contents. | ||
210 | * | ||
211 | * If the page was not migrated then the PageSwapCache bit | ||
212 | * is still set and the operation may continue. | ||
213 | */ | ||
214 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
215 | /* A vma has VM_LOCKED set -> permanent failure */ | ||
216 | return -EPERM; | ||
217 | 226 | ||
218 | /* | 227 | /* |
219 | * Give up if we were unable to remove all mappings. | 228 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. |
220 | */ | 229 | */ |
221 | if (page_mapcount(page)) | 230 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); |
222 | return -EAGAIN; | 231 | spin_lock(&anon_vma->lock); |
232 | |||
233 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
234 | remove_migration_pte(vma, old, new); | ||
235 | |||
236 | spin_unlock(&anon_vma->lock); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Get rid of all migration entries and replace them by | ||
241 | * references to the indicated page. | ||
242 | */ | ||
243 | static void remove_migration_ptes(struct page *old, struct page *new) | ||
244 | { | ||
245 | if (PageAnon(new)) | ||
246 | remove_anon_migration_ptes(old, new); | ||
247 | else | ||
248 | remove_file_migration_ptes(old, new); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Something used the pte of a page under migration. We need to | ||
253 | * get to the page and wait until migration is finished. | ||
254 | * When we return from this function the fault will be retried. | ||
255 | * | ||
256 | * This function is called from do_swap_page(). | ||
257 | */ | ||
258 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | ||
259 | unsigned long address) | ||
260 | { | ||
261 | pte_t *ptep, pte; | ||
262 | spinlock_t *ptl; | ||
263 | swp_entry_t entry; | ||
264 | struct page *page; | ||
265 | |||
266 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
267 | pte = *ptep; | ||
268 | if (!is_swap_pte(pte)) | ||
269 | goto out; | ||
270 | |||
271 | entry = pte_to_swp_entry(pte); | ||
272 | if (!is_migration_entry(entry)) | ||
273 | goto out; | ||
274 | |||
275 | page = migration_entry_to_page(entry); | ||
276 | |||
277 | get_page(page); | ||
278 | pte_unmap_unlock(ptep, ptl); | ||
279 | wait_on_page_locked(page); | ||
280 | put_page(page); | ||
281 | return; | ||
282 | out: | ||
283 | pte_unmap_unlock(ptep, ptl); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Replace the page in the mapping. | ||
288 | * | ||
289 | * The number of remaining references must be: | ||
290 | * 1 for anonymous pages without a mapping | ||
291 | * 2 for pages with a mapping | ||
292 | * 3 for pages with a mapping and PagePrivate set. | ||
293 | */ | ||
294 | static int migrate_page_move_mapping(struct address_space *mapping, | ||
295 | struct page *newpage, struct page *page) | ||
296 | { | ||
297 | struct page **radix_pointer; | ||
298 | |||
299 | if (!mapping) { | ||
300 | /* Anonymous page */ | ||
301 | if (page_count(page) != 1) | ||
302 | return -EAGAIN; | ||
303 | return 0; | ||
304 | } | ||
223 | 305 | ||
224 | write_lock_irq(&mapping->tree_lock); | 306 | write_lock_irq(&mapping->tree_lock); |
225 | 307 | ||
@@ -227,7 +309,7 @@ int migrate_page_remove_references(struct page *newpage, | |||
227 | &mapping->page_tree, | 309 | &mapping->page_tree, |
228 | page_index(page)); | 310 | page_index(page)); |
229 | 311 | ||
230 | if (!page_mapping(page) || page_count(page) != nr_refs || | 312 | if (page_count(page) != 2 + !!PagePrivate(page) || |
231 | *radix_pointer != page) { | 313 | *radix_pointer != page) { |
232 | write_unlock_irq(&mapping->tree_lock); | 314 | write_unlock_irq(&mapping->tree_lock); |
233 | return -EAGAIN; | 315 | return -EAGAIN; |
@@ -235,19 +317,14 @@ int migrate_page_remove_references(struct page *newpage, | |||
235 | 317 | ||
236 | /* | 318 | /* |
237 | * Now we know that no one else is looking at the page. | 319 | * Now we know that no one else is looking at the page. |
238 | * | ||
239 | * Certain minimal information about a page must be available | ||
240 | * in order for other subsystems to properly handle the page if they | ||
241 | * find it through the radix tree update before we are finished | ||
242 | * copying the page. | ||
243 | */ | 320 | */ |
244 | get_page(newpage); | 321 | get_page(newpage); |
245 | newpage->index = page->index; | 322 | #ifdef CONFIG_SWAP |
246 | newpage->mapping = page->mapping; | ||
247 | if (PageSwapCache(page)) { | 323 | if (PageSwapCache(page)) { |
248 | SetPageSwapCache(newpage); | 324 | SetPageSwapCache(newpage); |
249 | set_page_private(newpage, page_private(page)); | 325 | set_page_private(newpage, page_private(page)); |
250 | } | 326 | } |
327 | #endif | ||
251 | 328 | ||
252 | *radix_pointer = newpage; | 329 | *radix_pointer = newpage; |
253 | __put_page(page); | 330 | __put_page(page); |
@@ -255,12 +332,11 @@ int migrate_page_remove_references(struct page *newpage, | |||
255 | 332 | ||
256 | return 0; | 333 | return 0; |
257 | } | 334 | } |
258 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
259 | 335 | ||
260 | /* | 336 | /* |
261 | * Copy the page to its new location | 337 | * Copy the page to its new location |
262 | */ | 338 | */ |
263 | void migrate_page_copy(struct page *newpage, struct page *page) | 339 | static void migrate_page_copy(struct page *newpage, struct page *page) |
264 | { | 340 | { |
265 | copy_highpage(newpage, page); | 341 | copy_highpage(newpage, page); |
266 | 342 | ||
@@ -282,7 +358,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
282 | set_page_dirty(newpage); | 358 | set_page_dirty(newpage); |
283 | } | 359 | } |
284 | 360 | ||
361 | #ifdef CONFIG_SWAP | ||
285 | ClearPageSwapCache(page); | 362 | ClearPageSwapCache(page); |
363 | #endif | ||
286 | ClearPageActive(page); | 364 | ClearPageActive(page); |
287 | ClearPagePrivate(page); | 365 | ClearPagePrivate(page); |
288 | set_page_private(page, 0); | 366 | set_page_private(page, 0); |
@@ -295,7 +373,18 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
295 | if (PageWriteback(newpage)) | 373 | if (PageWriteback(newpage)) |
296 | end_page_writeback(newpage); | 374 | end_page_writeback(newpage); |
297 | } | 375 | } |
298 | EXPORT_SYMBOL(migrate_page_copy); | 376 | |
377 | /************************************************************ | ||
378 | * Migration functions | ||
379 | ***********************************************************/ | ||
380 | |||
381 | /* Always fail migration. Used for mappings that are not movable */ | ||
382 | int fail_migrate_page(struct address_space *mapping, | ||
383 | struct page *newpage, struct page *page) | ||
384 | { | ||
385 | return -EIO; | ||
386 | } | ||
387 | EXPORT_SYMBOL(fail_migrate_page); | ||
299 | 388 | ||
300 | /* | 389 | /* |
301 | * Common logic to directly migrate a single page suitable for | 390 | * Common logic to directly migrate a single page suitable for |
@@ -303,51 +392,284 @@ EXPORT_SYMBOL(migrate_page_copy); | |||
303 | * | 392 | * |
304 | * Pages are locked upon entry and exit. | 393 | * Pages are locked upon entry and exit. |
305 | */ | 394 | */ |
306 | int migrate_page(struct page *newpage, struct page *page) | 395 | int migrate_page(struct address_space *mapping, |
396 | struct page *newpage, struct page *page) | ||
307 | { | 397 | { |
308 | int rc; | 398 | int rc; |
309 | 399 | ||
310 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | 400 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
311 | 401 | ||
312 | rc = migrate_page_remove_references(newpage, page, 2); | 402 | rc = migrate_page_move_mapping(mapping, newpage, page); |
403 | |||
404 | if (rc) | ||
405 | return rc; | ||
406 | |||
407 | migrate_page_copy(newpage, page); | ||
408 | return 0; | ||
409 | } | ||
410 | EXPORT_SYMBOL(migrate_page); | ||
411 | |||
412 | /* | ||
413 | * Migration function for pages with buffers. This function can only be used | ||
414 | * if the underlying filesystem guarantees that no other references to "page" | ||
415 | * exist. | ||
416 | */ | ||
417 | int buffer_migrate_page(struct address_space *mapping, | ||
418 | struct page *newpage, struct page *page) | ||
419 | { | ||
420 | struct buffer_head *bh, *head; | ||
421 | int rc; | ||
422 | |||
423 | if (!page_has_buffers(page)) | ||
424 | return migrate_page(mapping, newpage, page); | ||
425 | |||
426 | head = page_buffers(page); | ||
427 | |||
428 | rc = migrate_page_move_mapping(mapping, newpage, page); | ||
313 | 429 | ||
314 | if (rc) | 430 | if (rc) |
315 | return rc; | 431 | return rc; |
316 | 432 | ||
433 | bh = head; | ||
434 | do { | ||
435 | get_bh(bh); | ||
436 | lock_buffer(bh); | ||
437 | bh = bh->b_this_page; | ||
438 | |||
439 | } while (bh != head); | ||
440 | |||
441 | ClearPagePrivate(page); | ||
442 | set_page_private(newpage, page_private(page)); | ||
443 | set_page_private(page, 0); | ||
444 | put_page(page); | ||
445 | get_page(newpage); | ||
446 | |||
447 | bh = head; | ||
448 | do { | ||
449 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
450 | bh = bh->b_this_page; | ||
451 | |||
452 | } while (bh != head); | ||
453 | |||
454 | SetPagePrivate(newpage); | ||
455 | |||
317 | migrate_page_copy(newpage, page); | 456 | migrate_page_copy(newpage, page); |
318 | 457 | ||
458 | bh = head; | ||
459 | do { | ||
460 | unlock_buffer(bh); | ||
461 | put_bh(bh); | ||
462 | bh = bh->b_this_page; | ||
463 | |||
464 | } while (bh != head); | ||
465 | |||
466 | return 0; | ||
467 | } | ||
468 | EXPORT_SYMBOL(buffer_migrate_page); | ||
469 | |||
470 | /* | ||
471 | * Writeback a page to clean the dirty state | ||
472 | */ | ||
473 | static int writeout(struct address_space *mapping, struct page *page) | ||
474 | { | ||
475 | struct writeback_control wbc = { | ||
476 | .sync_mode = WB_SYNC_NONE, | ||
477 | .nr_to_write = 1, | ||
478 | .range_start = 0, | ||
479 | .range_end = LLONG_MAX, | ||
480 | .nonblocking = 1, | ||
481 | .for_reclaim = 1 | ||
482 | }; | ||
483 | int rc; | ||
484 | |||
485 | if (!mapping->a_ops->writepage) | ||
486 | /* No write method for the address space */ | ||
487 | return -EINVAL; | ||
488 | |||
489 | if (!clear_page_dirty_for_io(page)) | ||
490 | /* Someone else already triggered a write */ | ||
491 | return -EAGAIN; | ||
492 | |||
319 | /* | 493 | /* |
320 | * Remove auxiliary swap entries and replace | 494 | * A dirty page may imply that the underlying filesystem has |
321 | * them with real ptes. | 495 | * the page on some queue. So the page must be clean for |
322 | * | 496 | * migration. Writeout may mean we loose the lock and the |
323 | * Note that a real pte entry will allow processes that are not | 497 | * page state is no longer what we checked for earlier. |
324 | * waiting on the page lock to use the new page via the page tables | 498 | * At this point we know that the migration attempt cannot |
325 | * before the new page is unlocked. | 499 | * be successful. |
326 | */ | 500 | */ |
327 | remove_from_swap(newpage); | 501 | remove_migration_ptes(page, page); |
328 | return 0; | 502 | |
503 | rc = mapping->a_ops->writepage(page, &wbc); | ||
504 | if (rc < 0) | ||
505 | /* I/O Error writing */ | ||
506 | return -EIO; | ||
507 | |||
508 | if (rc != AOP_WRITEPAGE_ACTIVATE) | ||
509 | /* unlocked. Relock */ | ||
510 | lock_page(page); | ||
511 | |||
512 | return -EAGAIN; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Default handling if a filesystem does not provide a migration function. | ||
517 | */ | ||
518 | static int fallback_migrate_page(struct address_space *mapping, | ||
519 | struct page *newpage, struct page *page) | ||
520 | { | ||
521 | if (PageDirty(page)) | ||
522 | return writeout(mapping, page); | ||
523 | |||
524 | /* | ||
525 | * Buffers may be managed in a filesystem specific way. | ||
526 | * We must have no buffers or drop them. | ||
527 | */ | ||
528 | if (page_has_buffers(page) && | ||
529 | !try_to_release_page(page, GFP_KERNEL)) | ||
530 | return -EAGAIN; | ||
531 | |||
532 | return migrate_page(mapping, newpage, page); | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Move a page to a newly allocated page | ||
537 | * The page is locked and all ptes have been successfully removed. | ||
538 | * | ||
539 | * The new page will have replaced the old page if this function | ||
540 | * is successful. | ||
541 | */ | ||
542 | static int move_to_new_page(struct page *newpage, struct page *page) | ||
543 | { | ||
544 | struct address_space *mapping; | ||
545 | int rc; | ||
546 | |||
547 | /* | ||
548 | * Block others from accessing the page when we get around to | ||
549 | * establishing additional references. We are the only one | ||
550 | * holding a reference to the new page at this point. | ||
551 | */ | ||
552 | if (TestSetPageLocked(newpage)) | ||
553 | BUG(); | ||
554 | |||
555 | /* Prepare mapping for the new page.*/ | ||
556 | newpage->index = page->index; | ||
557 | newpage->mapping = page->mapping; | ||
558 | |||
559 | mapping = page_mapping(page); | ||
560 | if (!mapping) | ||
561 | rc = migrate_page(mapping, newpage, page); | ||
562 | else if (mapping->a_ops->migratepage) | ||
563 | /* | ||
564 | * Most pages have a mapping and most filesystems | ||
565 | * should provide a migration function. Anonymous | ||
566 | * pages are part of swap space which also has its | ||
567 | * own migration function. This is the most common | ||
568 | * path for page migration. | ||
569 | */ | ||
570 | rc = mapping->a_ops->migratepage(mapping, | ||
571 | newpage, page); | ||
572 | else | ||
573 | rc = fallback_migrate_page(mapping, newpage, page); | ||
574 | |||
575 | if (!rc) | ||
576 | remove_migration_ptes(page, newpage); | ||
577 | else | ||
578 | newpage->mapping = NULL; | ||
579 | |||
580 | unlock_page(newpage); | ||
581 | |||
582 | return rc; | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * Obtain the lock on page, remove all ptes and migrate the page | ||
587 | * to the newly allocated page in newpage. | ||
588 | */ | ||
589 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | ||
590 | struct page *page, int force) | ||
591 | { | ||
592 | int rc = 0; | ||
593 | int *result = NULL; | ||
594 | struct page *newpage = get_new_page(page, private, &result); | ||
595 | |||
596 | if (!newpage) | ||
597 | return -ENOMEM; | ||
598 | |||
599 | if (page_count(page) == 1) | ||
600 | /* page was freed from under us. So we are done. */ | ||
601 | goto move_newpage; | ||
602 | |||
603 | rc = -EAGAIN; | ||
604 | if (TestSetPageLocked(page)) { | ||
605 | if (!force) | ||
606 | goto move_newpage; | ||
607 | lock_page(page); | ||
608 | } | ||
609 | |||
610 | if (PageWriteback(page)) { | ||
611 | if (!force) | ||
612 | goto unlock; | ||
613 | wait_on_page_writeback(page); | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Establish migration ptes or remove ptes | ||
618 | */ | ||
619 | try_to_unmap(page, 1); | ||
620 | if (!page_mapped(page)) | ||
621 | rc = move_to_new_page(newpage, page); | ||
622 | |||
623 | if (rc) | ||
624 | remove_migration_ptes(page, page); | ||
625 | |||
626 | unlock: | ||
627 | unlock_page(page); | ||
628 | |||
629 | if (rc != -EAGAIN) { | ||
630 | /* | ||
631 | * A page that has been migrated has all references | ||
632 | * removed and will be freed. A page that has not been | ||
633 | * migrated will have kepts its references and be | ||
634 | * restored. | ||
635 | */ | ||
636 | list_del(&page->lru); | ||
637 | move_to_lru(page); | ||
638 | } | ||
639 | |||
640 | move_newpage: | ||
641 | /* | ||
642 | * Move the new page to the LRU. If migration was not successful | ||
643 | * then this will free the page. | ||
644 | */ | ||
645 | move_to_lru(newpage); | ||
646 | if (result) { | ||
647 | if (rc) | ||
648 | *result = rc; | ||
649 | else | ||
650 | *result = page_to_nid(newpage); | ||
651 | } | ||
652 | return rc; | ||
329 | } | 653 | } |
330 | EXPORT_SYMBOL(migrate_page); | ||
331 | 654 | ||
332 | /* | 655 | /* |
333 | * migrate_pages | 656 | * migrate_pages |
334 | * | 657 | * |
335 | * Two lists are passed to this function. The first list | 658 | * The function takes one list of pages to migrate and a function |
336 | * contains the pages isolated from the LRU to be migrated. | 659 | * that determines from the page to be migrated and the private data |
337 | * The second list contains new pages that the pages isolated | 660 | * the target of the move and allocates the page. |
338 | * can be moved to. If the second list is NULL then all | ||
339 | * pages are swapped out. | ||
340 | * | 661 | * |
341 | * The function returns after 10 attempts or if no pages | 662 | * The function returns after 10 attempts or if no pages |
342 | * are movable anymore because to has become empty | 663 | * are movable anymore because to has become empty |
343 | * or no retryable pages exist anymore. | 664 | * or no retryable pages exist anymore. All pages will be |
665 | * retruned to the LRU or freed. | ||
344 | * | 666 | * |
345 | * Return: Number of pages not migrated when "to" ran empty. | 667 | * Return: Number of pages not migrated or error code. |
346 | */ | 668 | */ |
347 | int migrate_pages(struct list_head *from, struct list_head *to, | 669 | int migrate_pages(struct list_head *from, |
348 | struct list_head *moved, struct list_head *failed) | 670 | new_page_t get_new_page, unsigned long private) |
349 | { | 671 | { |
350 | int retry; | 672 | int retry = 1; |
351 | int nr_failed = 0; | 673 | int nr_failed = 0; |
352 | int pass = 0; | 674 | int pass = 0; |
353 | struct page *page; | 675 | struct page *page; |
@@ -358,305 +680,317 @@ int migrate_pages(struct list_head *from, struct list_head *to, | |||
358 | if (!swapwrite) | 680 | if (!swapwrite) |
359 | current->flags |= PF_SWAPWRITE; | 681 | current->flags |= PF_SWAPWRITE; |
360 | 682 | ||
361 | redo: | 683 | for(pass = 0; pass < 10 && retry; pass++) { |
362 | retry = 0; | 684 | retry = 0; |
685 | |||
686 | list_for_each_entry_safe(page, page2, from, lru) { | ||
687 | cond_resched(); | ||
688 | |||
689 | rc = unmap_and_move(get_new_page, private, | ||
690 | page, pass > 2); | ||
691 | |||
692 | switch(rc) { | ||
693 | case -ENOMEM: | ||
694 | goto out; | ||
695 | case -EAGAIN: | ||
696 | retry++; | ||
697 | break; | ||
698 | case 0: | ||
699 | break; | ||
700 | default: | ||
701 | /* Permanent failure */ | ||
702 | nr_failed++; | ||
703 | break; | ||
704 | } | ||
705 | } | ||
706 | } | ||
707 | rc = 0; | ||
708 | out: | ||
709 | if (!swapwrite) | ||
710 | current->flags &= ~PF_SWAPWRITE; | ||
711 | |||
712 | putback_lru_pages(from); | ||
713 | |||
714 | if (rc) | ||
715 | return rc; | ||
363 | 716 | ||
364 | list_for_each_entry_safe(page, page2, from, lru) { | 717 | return nr_failed + retry; |
365 | struct page *newpage = NULL; | 718 | } |
366 | struct address_space *mapping; | ||
367 | 719 | ||
368 | cond_resched(); | 720 | #ifdef CONFIG_NUMA |
721 | /* | ||
722 | * Move a list of individual pages | ||
723 | */ | ||
724 | struct page_to_node { | ||
725 | unsigned long addr; | ||
726 | struct page *page; | ||
727 | int node; | ||
728 | int status; | ||
729 | }; | ||
369 | 730 | ||
370 | rc = 0; | 731 | static struct page *new_page_node(struct page *p, unsigned long private, |
371 | if (page_count(page) == 1) | 732 | int **result) |
372 | /* page was freed from under us. So we are done. */ | 733 | { |
373 | goto next; | 734 | struct page_to_node *pm = (struct page_to_node *)private; |
374 | 735 | ||
375 | if (to && list_empty(to)) | 736 | while (pm->node != MAX_NUMNODES && pm->page != p) |
376 | break; | 737 | pm++; |
377 | 738 | ||
378 | /* | 739 | if (pm->node == MAX_NUMNODES) |
379 | * Skip locked pages during the first two passes to give the | 740 | return NULL; |
380 | * functions holding the lock time to release the page. Later we | ||
381 | * use lock_page() to have a higher chance of acquiring the | ||
382 | * lock. | ||
383 | */ | ||
384 | rc = -EAGAIN; | ||
385 | if (pass > 2) | ||
386 | lock_page(page); | ||
387 | else | ||
388 | if (TestSetPageLocked(page)) | ||
389 | goto next; | ||
390 | 741 | ||
391 | /* | 742 | *result = &pm->status; |
392 | * Only wait on writeback if we have already done a pass where | ||
393 | * we we may have triggered writeouts for lots of pages. | ||
394 | */ | ||
395 | if (pass > 0) { | ||
396 | wait_on_page_writeback(page); | ||
397 | } else { | ||
398 | if (PageWriteback(page)) | ||
399 | goto unlock_page; | ||
400 | } | ||
401 | 743 | ||
402 | /* | 744 | return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); |
403 | * Anonymous pages must have swap cache references otherwise | 745 | } |
404 | * the information contained in the page maps cannot be | ||
405 | * preserved. | ||
406 | */ | ||
407 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
408 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
409 | rc = -ENOMEM; | ||
410 | goto unlock_page; | ||
411 | } | ||
412 | } | ||
413 | 746 | ||
414 | if (!to) { | 747 | /* |
415 | rc = swap_page(page); | 748 | * Move a set of pages as indicated in the pm array. The addr |
416 | goto next; | 749 | * field must be set to the virtual address of the page to be moved |
417 | } | 750 | * and the node number must contain a valid target node. |
751 | */ | ||
752 | static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | ||
753 | int migrate_all) | ||
754 | { | ||
755 | int err; | ||
756 | struct page_to_node *pp; | ||
757 | LIST_HEAD(pagelist); | ||
758 | |||
759 | down_read(&mm->mmap_sem); | ||
418 | 760 | ||
419 | newpage = lru_to_page(to); | 761 | /* |
420 | lock_page(newpage); | 762 | * Build a list of pages to migrate |
763 | */ | ||
764 | migrate_prep(); | ||
765 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | ||
766 | struct vm_area_struct *vma; | ||
767 | struct page *page; | ||
421 | 768 | ||
422 | /* | 769 | /* |
423 | * Pages are properly locked and writeback is complete. | 770 | * A valid page pointer that will not match any of the |
424 | * Try to migrate the page. | 771 | * pages that will be moved. |
425 | */ | 772 | */ |
426 | mapping = page_mapping(page); | 773 | pp->page = ZERO_PAGE(0); |
427 | if (!mapping) | ||
428 | goto unlock_both; | ||
429 | 774 | ||
430 | if (mapping->a_ops->migratepage) { | 775 | err = -EFAULT; |
431 | /* | 776 | vma = find_vma(mm, pp->addr); |
432 | * Most pages have a mapping and most filesystems | 777 | if (!vma) |
433 | * should provide a migration function. Anonymous | 778 | goto set_status; |
434 | * pages are part of swap space which also has its | ||
435 | * own migration function. This is the most common | ||
436 | * path for page migration. | ||
437 | */ | ||
438 | rc = mapping->a_ops->migratepage(newpage, page); | ||
439 | goto unlock_both; | ||
440 | } | ||
441 | |||
442 | /* Make sure the dirty bit is up to date */ | ||
443 | if (try_to_unmap(page, 1) == SWAP_FAIL) { | ||
444 | rc = -EPERM; | ||
445 | goto unlock_both; | ||
446 | } | ||
447 | 779 | ||
448 | if (page_mapcount(page)) { | 780 | page = follow_page(vma, pp->addr, FOLL_GET); |
449 | rc = -EAGAIN; | 781 | err = -ENOENT; |
450 | goto unlock_both; | 782 | if (!page) |
451 | } | 783 | goto set_status; |
452 | 784 | ||
453 | /* | 785 | if (PageReserved(page)) /* Check for zero page */ |
454 | * Default handling if a filesystem does not provide | 786 | goto put_and_set; |
455 | * a migration function. We can only migrate clean | ||
456 | * pages so try to write out any dirty pages first. | ||
457 | */ | ||
458 | if (PageDirty(page)) { | ||
459 | switch (pageout(page, mapping)) { | ||
460 | case PAGE_KEEP: | ||
461 | case PAGE_ACTIVATE: | ||
462 | goto unlock_both; | ||
463 | |||
464 | case PAGE_SUCCESS: | ||
465 | unlock_page(newpage); | ||
466 | goto next; | ||
467 | |||
468 | case PAGE_CLEAN: | ||
469 | ; /* try to migrate the page below */ | ||
470 | } | ||
471 | } | ||
472 | 787 | ||
473 | /* | 788 | pp->page = page; |
474 | * Buffers are managed in a filesystem specific way. | 789 | err = page_to_nid(page); |
475 | * We must have no buffers or drop them. | ||
476 | */ | ||
477 | if (!page_has_buffers(page) || | ||
478 | try_to_release_page(page, GFP_KERNEL)) { | ||
479 | rc = migrate_page(newpage, page); | ||
480 | goto unlock_both; | ||
481 | } | ||
482 | 790 | ||
483 | /* | 791 | if (err == pp->node) |
484 | * On early passes with mapped pages simply | ||
485 | * retry. There may be a lock held for some | ||
486 | * buffers that may go away. Later | ||
487 | * swap them out. | ||
488 | */ | ||
489 | if (pass > 4) { | ||
490 | /* | 792 | /* |
491 | * Persistently unable to drop buffers..... As a | 793 | * Node already in the right place |
492 | * measure of last resort we fall back to | ||
493 | * swap_page(). | ||
494 | */ | 794 | */ |
495 | unlock_page(newpage); | 795 | goto put_and_set; |
496 | newpage = NULL; | ||
497 | rc = swap_page(page); | ||
498 | goto next; | ||
499 | } | ||
500 | 796 | ||
501 | unlock_both: | 797 | err = -EACCES; |
502 | unlock_page(newpage); | 798 | if (page_mapcount(page) > 1 && |
503 | 799 | !migrate_all) | |
504 | unlock_page: | 800 | goto put_and_set; |
505 | unlock_page(page); | 801 | |
506 | 802 | err = isolate_lru_page(page, &pagelist); | |
507 | next: | 803 | put_and_set: |
508 | if (rc == -EAGAIN) { | 804 | /* |
509 | retry++; | 805 | * Either remove the duplicate refcount from |
510 | } else if (rc) { | 806 | * isolate_lru_page() or drop the page ref if it was |
511 | /* Permanent failure */ | 807 | * not isolated. |
512 | list_move(&page->lru, failed); | 808 | */ |
513 | nr_failed++; | 809 | put_page(page); |
514 | } else { | 810 | set_status: |
515 | if (newpage) { | 811 | pp->status = err; |
516 | /* Successful migration. Return page to LRU */ | ||
517 | move_to_lru(newpage); | ||
518 | } | ||
519 | list_move(&page->lru, moved); | ||
520 | } | ||
521 | } | 812 | } |
522 | if (retry && pass++ < 10) | ||
523 | goto redo; | ||
524 | 813 | ||
525 | if (!swapwrite) | 814 | if (!list_empty(&pagelist)) |
526 | current->flags &= ~PF_SWAPWRITE; | 815 | err = migrate_pages(&pagelist, new_page_node, |
816 | (unsigned long)pm); | ||
817 | else | ||
818 | err = -ENOENT; | ||
527 | 819 | ||
528 | return nr_failed + retry; | 820 | up_read(&mm->mmap_sem); |
821 | return err; | ||
529 | } | 822 | } |
530 | 823 | ||
531 | /* | 824 | /* |
532 | * Migration function for pages with buffers. This function can only be used | 825 | * Determine the nodes of a list of pages. The addr in the pm array |
533 | * if the underlying filesystem guarantees that no other references to "page" | 826 | * must have been set to the virtual address of which we want to determine |
534 | * exist. | 827 | * the node number. |
535 | */ | 828 | */ |
536 | int buffer_migrate_page(struct page *newpage, struct page *page) | 829 | static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) |
537 | { | 830 | { |
538 | struct address_space *mapping = page->mapping; | 831 | down_read(&mm->mmap_sem); |
539 | struct buffer_head *bh, *head; | 832 | |
540 | int rc; | 833 | for ( ; pm->node != MAX_NUMNODES; pm++) { |
834 | struct vm_area_struct *vma; | ||
835 | struct page *page; | ||
836 | int err; | ||
837 | |||
838 | err = -EFAULT; | ||
839 | vma = find_vma(mm, pm->addr); | ||
840 | if (!vma) | ||
841 | goto set_status; | ||
842 | |||
843 | page = follow_page(vma, pm->addr, 0); | ||
844 | err = -ENOENT; | ||
845 | /* Use PageReserved to check for zero page */ | ||
846 | if (!page || PageReserved(page)) | ||
847 | goto set_status; | ||
848 | |||
849 | err = page_to_nid(page); | ||
850 | set_status: | ||
851 | pm->status = err; | ||
852 | } | ||
541 | 853 | ||
542 | if (!mapping) | 854 | up_read(&mm->mmap_sem); |
543 | return -EAGAIN; | 855 | return 0; |
856 | } | ||
544 | 857 | ||
545 | if (!page_has_buffers(page)) | 858 | /* |
546 | return migrate_page(newpage, page); | 859 | * Move a list of pages in the address space of the currently executing |
860 | * process. | ||
861 | */ | ||
862 | asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
863 | const void __user * __user *pages, | ||
864 | const int __user *nodes, | ||
865 | int __user *status, int flags) | ||
866 | { | ||
867 | int err = 0; | ||
868 | int i; | ||
869 | struct task_struct *task; | ||
870 | nodemask_t task_nodes; | ||
871 | struct mm_struct *mm; | ||
872 | struct page_to_node *pm = NULL; | ||
547 | 873 | ||
548 | head = page_buffers(page); | 874 | /* Check flags */ |
875 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | ||
876 | return -EINVAL; | ||
549 | 877 | ||
550 | rc = migrate_page_remove_references(newpage, page, 3); | 878 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) |
879 | return -EPERM; | ||
551 | 880 | ||
552 | if (rc) | 881 | /* Find the mm_struct */ |
553 | return rc; | 882 | read_lock(&tasklist_lock); |
883 | task = pid ? find_task_by_pid(pid) : current; | ||
884 | if (!task) { | ||
885 | read_unlock(&tasklist_lock); | ||
886 | return -ESRCH; | ||
887 | } | ||
888 | mm = get_task_mm(task); | ||
889 | read_unlock(&tasklist_lock); | ||
554 | 890 | ||
555 | bh = head; | 891 | if (!mm) |
556 | do { | 892 | return -EINVAL; |
557 | get_bh(bh); | ||
558 | lock_buffer(bh); | ||
559 | bh = bh->b_this_page; | ||
560 | 893 | ||
561 | } while (bh != head); | 894 | /* |
895 | * Check if this process has the right to modify the specified | ||
896 | * process. The right exists if the process has administrative | ||
897 | * capabilities, superuser privileges or the same | ||
898 | * userid as the target process. | ||
899 | */ | ||
900 | if ((current->euid != task->suid) && (current->euid != task->uid) && | ||
901 | (current->uid != task->suid) && (current->uid != task->uid) && | ||
902 | !capable(CAP_SYS_NICE)) { | ||
903 | err = -EPERM; | ||
904 | goto out2; | ||
905 | } | ||
562 | 906 | ||
563 | ClearPagePrivate(page); | 907 | err = security_task_movememory(task); |
564 | set_page_private(newpage, page_private(page)); | 908 | if (err) |
565 | set_page_private(page, 0); | 909 | goto out2; |
566 | put_page(page); | ||
567 | get_page(newpage); | ||
568 | 910 | ||
569 | bh = head; | ||
570 | do { | ||
571 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
572 | bh = bh->b_this_page; | ||
573 | 911 | ||
574 | } while (bh != head); | 912 | task_nodes = cpuset_mems_allowed(task); |
575 | 913 | ||
576 | SetPagePrivate(newpage); | 914 | /* Limit nr_pages so that the multiplication may not overflow */ |
915 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | ||
916 | err = -E2BIG; | ||
917 | goto out2; | ||
918 | } | ||
577 | 919 | ||
578 | migrate_page_copy(newpage, page); | 920 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); |
921 | if (!pm) { | ||
922 | err = -ENOMEM; | ||
923 | goto out2; | ||
924 | } | ||
579 | 925 | ||
580 | bh = head; | 926 | /* |
581 | do { | 927 | * Get parameters from user space and initialize the pm |
582 | unlock_buffer(bh); | 928 | * array. Return various errors if the user did something wrong. |
583 | put_bh(bh); | 929 | */ |
584 | bh = bh->b_this_page; | 930 | for (i = 0; i < nr_pages; i++) { |
931 | const void *p; | ||
585 | 932 | ||
586 | } while (bh != head); | 933 | err = -EFAULT; |
934 | if (get_user(p, pages + i)) | ||
935 | goto out; | ||
587 | 936 | ||
588 | return 0; | 937 | pm[i].addr = (unsigned long)p; |
589 | } | 938 | if (nodes) { |
590 | EXPORT_SYMBOL(buffer_migrate_page); | 939 | int node; |
591 | 940 | ||
592 | /* | 941 | if (get_user(node, nodes + i)) |
593 | * Migrate the list 'pagelist' of pages to a certain destination. | 942 | goto out; |
594 | * | ||
595 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
596 | * Return the number of pages not migrated or error code | ||
597 | */ | ||
598 | int migrate_pages_to(struct list_head *pagelist, | ||
599 | struct vm_area_struct *vma, int dest) | ||
600 | { | ||
601 | LIST_HEAD(newlist); | ||
602 | LIST_HEAD(moved); | ||
603 | LIST_HEAD(failed); | ||
604 | int err = 0; | ||
605 | unsigned long offset = 0; | ||
606 | int nr_pages; | ||
607 | struct page *page; | ||
608 | struct list_head *p; | ||
609 | 943 | ||
610 | redo: | 944 | err = -ENODEV; |
611 | nr_pages = 0; | 945 | if (!node_online(node)) |
612 | list_for_each(p, pagelist) { | 946 | goto out; |
613 | if (vma) { | ||
614 | /* | ||
615 | * The address passed to alloc_page_vma is used to | ||
616 | * generate the proper interleave behavior. We fake | ||
617 | * the address here by an increasing offset in order | ||
618 | * to get the proper distribution of pages. | ||
619 | * | ||
620 | * No decision has been made as to which page | ||
621 | * a certain old page is moved to so we cannot | ||
622 | * specify the correct address. | ||
623 | */ | ||
624 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
625 | offset + vma->vm_start); | ||
626 | offset += PAGE_SIZE; | ||
627 | } | ||
628 | else | ||
629 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
630 | 947 | ||
631 | if (!page) { | 948 | err = -EACCES; |
632 | err = -ENOMEM; | 949 | if (!node_isset(node, task_nodes)) |
633 | goto out; | 950 | goto out; |
951 | |||
952 | pm[i].node = node; | ||
634 | } | 953 | } |
635 | list_add_tail(&page->lru, &newlist); | ||
636 | nr_pages++; | ||
637 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
638 | break; | ||
639 | } | 954 | } |
640 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | 955 | /* End marker */ |
956 | pm[nr_pages].node = MAX_NUMNODES; | ||
957 | |||
958 | if (nodes) | ||
959 | err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
960 | else | ||
961 | err = do_pages_stat(mm, pm); | ||
641 | 962 | ||
642 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | 963 | if (err >= 0) |
964 | /* Return status information */ | ||
965 | for (i = 0; i < nr_pages; i++) | ||
966 | if (put_user(pm[i].status, status + i)) | ||
967 | err = -EFAULT; | ||
643 | 968 | ||
644 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
645 | goto redo; | ||
646 | out: | 969 | out: |
647 | /* Return leftover allocated pages */ | 970 | vfree(pm); |
648 | while (!list_empty(&newlist)) { | 971 | out2: |
649 | page = list_entry(newlist.next, struct page, lru); | 972 | mmput(mm); |
650 | list_del(&page->lru); | 973 | return err; |
651 | __free_page(page); | 974 | } |
652 | } | 975 | #endif |
653 | list_splice(&failed, pagelist); | 976 | |
654 | if (err < 0) | 977 | /* |
655 | return err; | 978 | * Call migration functions in the vma_ops that may prepare |
656 | 979 | * memory in a vm for migration. migration functions may perform | |
657 | /* Calculate number of leftover pages */ | 980 | * the migration for vmas that do not have an underlying page struct. |
658 | nr_pages = 0; | 981 | */ |
659 | list_for_each(p, pagelist) | 982 | int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, |
660 | nr_pages++; | 983 | const nodemask_t *from, unsigned long flags) |
661 | return nr_pages; | 984 | { |
985 | struct vm_area_struct *vma; | ||
986 | int err = 0; | ||
987 | |||
988 | for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { | ||
989 | if (vma->vm_ops && vma->vm_ops->migrate) { | ||
990 | err = vma->vm_ops->migrate(vma, to, from, flags); | ||
991 | if (err) | ||
992 | break; | ||
993 | } | ||
994 | } | ||
995 | return err; | ||
662 | } | 996 | } |