diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 1058 |
1 files changed, 687 insertions, 371 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 1c25040693d2..1c2a71aa05cd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/swapops.h> | ||
18 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
19 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
20 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
@@ -23,13 +24,13 @@ | |||
23 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
24 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
25 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
26 | #include <linux/swapops.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/mempolicy.h> | ||
29 | #include <linux/vmalloc.h> | ||
30 | #include <linux/security.h> | ||
27 | 31 | ||
28 | #include "internal.h" | 32 | #include "internal.h" |
29 | 33 | ||
30 | /* The maximum number of pages to take off the LRU for migration */ | ||
31 | #define MIGRATE_CHUNK_SIZE 256 | ||
32 | |||
33 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 34 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
34 | 35 | ||
35 | /* | 36 | /* |
@@ -64,16 +65,11 @@ int isolate_lru_page(struct page *page, struct list_head *pagelist) | |||
64 | } | 65 | } |
65 | 66 | ||
66 | /* | 67 | /* |
67 | * migrate_prep() needs to be called after we have compiled the list of pages | 68 | * migrate_prep() needs to be called before we start compiling a list of pages |
68 | * to be migrated using isolate_lru_page() but before we begin a series of calls | 69 | * to be migrated using isolate_lru_page(). |
69 | * to migrate_pages(). | ||
70 | */ | 70 | */ |
71 | int migrate_prep(void) | 71 | int migrate_prep(void) |
72 | { | 72 | { |
73 | /* Must have swap device for migration */ | ||
74 | if (nr_swap_pages <= 0) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* | 73 | /* |
78 | * Clear the LRU lists so pages can be isolated. | 74 | * Clear the LRU lists so pages can be isolated. |
79 | * Note that pages may be moved off the LRU after we have | 75 | * Note that pages may be moved off the LRU after we have |
@@ -87,7 +83,6 @@ int migrate_prep(void) | |||
87 | 83 | ||
88 | static inline void move_to_lru(struct page *page) | 84 | static inline void move_to_lru(struct page *page) |
89 | { | 85 | { |
90 | list_del(&page->lru); | ||
91 | if (PageActive(page)) { | 86 | if (PageActive(page)) { |
92 | /* | 87 | /* |
93 | * lru_cache_add_active checks that | 88 | * lru_cache_add_active checks that |
@@ -113,113 +108,200 @@ int putback_lru_pages(struct list_head *l) | |||
113 | int count = 0; | 108 | int count = 0; |
114 | 109 | ||
115 | list_for_each_entry_safe(page, page2, l, lru) { | 110 | list_for_each_entry_safe(page, page2, l, lru) { |
111 | list_del(&page->lru); | ||
116 | move_to_lru(page); | 112 | move_to_lru(page); |
117 | count++; | 113 | count++; |
118 | } | 114 | } |
119 | return count; | 115 | return count; |
120 | } | 116 | } |
121 | 117 | ||
122 | /* | 118 | static inline int is_swap_pte(pte_t pte) |
123 | * Non migratable page | ||
124 | */ | ||
125 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
126 | { | 119 | { |
127 | return -EIO; | 120 | return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); |
128 | } | 121 | } |
129 | EXPORT_SYMBOL(fail_migrate_page); | ||
130 | 122 | ||
131 | /* | 123 | /* |
132 | * swapout a single page | 124 | * Restore a potential migration pte to a working pte entry |
133 | * page is locked upon entry, unlocked on exit | ||
134 | */ | 125 | */ |
135 | static int swap_page(struct page *page) | 126 | static void remove_migration_pte(struct vm_area_struct *vma, |
127 | struct page *old, struct page *new) | ||
136 | { | 128 | { |
137 | struct address_space *mapping = page_mapping(page); | 129 | struct mm_struct *mm = vma->vm_mm; |
130 | swp_entry_t entry; | ||
131 | pgd_t *pgd; | ||
132 | pud_t *pud; | ||
133 | pmd_t *pmd; | ||
134 | pte_t *ptep, pte; | ||
135 | spinlock_t *ptl; | ||
136 | unsigned long addr = page_address_in_vma(new, vma); | ||
137 | |||
138 | if (addr == -EFAULT) | ||
139 | return; | ||
140 | |||
141 | pgd = pgd_offset(mm, addr); | ||
142 | if (!pgd_present(*pgd)) | ||
143 | return; | ||
144 | |||
145 | pud = pud_offset(pgd, addr); | ||
146 | if (!pud_present(*pud)) | ||
147 | return; | ||
148 | |||
149 | pmd = pmd_offset(pud, addr); | ||
150 | if (!pmd_present(*pmd)) | ||
151 | return; | ||
152 | |||
153 | ptep = pte_offset_map(pmd, addr); | ||
154 | |||
155 | if (!is_swap_pte(*ptep)) { | ||
156 | pte_unmap(ptep); | ||
157 | return; | ||
158 | } | ||
138 | 159 | ||
139 | if (page_mapped(page) && mapping) | 160 | ptl = pte_lockptr(mm, pmd); |
140 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) | 161 | spin_lock(ptl); |
141 | goto unlock_retry; | 162 | pte = *ptep; |
163 | if (!is_swap_pte(pte)) | ||
164 | goto out; | ||
142 | 165 | ||
143 | if (PageDirty(page)) { | 166 | entry = pte_to_swp_entry(pte); |
144 | /* Page is dirty, try to write it out here */ | ||
145 | switch(pageout(page, mapping)) { | ||
146 | case PAGE_KEEP: | ||
147 | case PAGE_ACTIVATE: | ||
148 | goto unlock_retry; | ||
149 | 167 | ||
150 | case PAGE_SUCCESS: | 168 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
151 | goto retry; | 169 | goto out; |
152 | 170 | ||
153 | case PAGE_CLEAN: | 171 | get_page(new); |
154 | ; /* try to free the page below */ | 172 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
155 | } | 173 | if (is_write_migration_entry(entry)) |
156 | } | 174 | pte = pte_mkwrite(pte); |
175 | set_pte_at(mm, addr, ptep, pte); | ||
157 | 176 | ||
158 | if (PagePrivate(page)) { | 177 | if (PageAnon(new)) |
159 | if (!try_to_release_page(page, GFP_KERNEL) || | 178 | page_add_anon_rmap(new, vma, addr); |
160 | (!mapping && page_count(page) == 1)) | 179 | else |
161 | goto unlock_retry; | 180 | page_add_file_rmap(new); |
162 | } | ||
163 | 181 | ||
164 | if (remove_mapping(mapping, page)) { | 182 | /* No need to invalidate - it was non-present before */ |
165 | /* Success */ | 183 | update_mmu_cache(vma, addr, pte); |
166 | unlock_page(page); | 184 | lazy_mmu_prot_update(pte); |
167 | return 0; | ||
168 | } | ||
169 | 185 | ||
170 | unlock_retry: | 186 | out: |
171 | unlock_page(page); | 187 | pte_unmap_unlock(ptep, ptl); |
188 | } | ||
172 | 189 | ||
173 | retry: | 190 | /* |
174 | return -EAGAIN; | 191 | * Note that remove_file_migration_ptes will only work on regular mappings, |
192 | * Nonlinear mappings do not use migration entries. | ||
193 | */ | ||
194 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
195 | { | ||
196 | struct vm_area_struct *vma; | ||
197 | struct address_space *mapping = page_mapping(new); | ||
198 | struct prio_tree_iter iter; | ||
199 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
200 | |||
201 | if (!mapping) | ||
202 | return; | ||
203 | |||
204 | spin_lock(&mapping->i_mmap_lock); | ||
205 | |||
206 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
207 | remove_migration_pte(vma, old, new); | ||
208 | |||
209 | spin_unlock(&mapping->i_mmap_lock); | ||
175 | } | 210 | } |
176 | 211 | ||
177 | /* | 212 | /* |
178 | * Remove references for a page and establish the new page with the correct | 213 | * Must hold mmap_sem lock on at least one of the vmas containing |
179 | * basic settings to be able to stop accesses to the page. | 214 | * the page so that the anon_vma cannot vanish. |
180 | */ | 215 | */ |
181 | int migrate_page_remove_references(struct page *newpage, | 216 | static void remove_anon_migration_ptes(struct page *old, struct page *new) |
182 | struct page *page, int nr_refs) | ||
183 | { | 217 | { |
184 | struct address_space *mapping = page_mapping(page); | 218 | struct anon_vma *anon_vma; |
185 | struct page **radix_pointer; | 219 | struct vm_area_struct *vma; |
220 | unsigned long mapping; | ||
186 | 221 | ||
187 | /* | 222 | mapping = (unsigned long)new->mapping; |
188 | * Avoid doing any of the following work if the page count | ||
189 | * indicates that the page is in use or truncate has removed | ||
190 | * the page. | ||
191 | */ | ||
192 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
193 | return -EAGAIN; | ||
194 | 223 | ||
195 | /* | 224 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) |
196 | * Establish swap ptes for anonymous pages or destroy pte | 225 | return; |
197 | * maps for files. | ||
198 | * | ||
199 | * In order to reestablish file backed mappings the fault handlers | ||
200 | * will take the radix tree_lock which may then be used to stop | ||
201 | * processses from accessing this page until the new page is ready. | ||
202 | * | ||
203 | * A process accessing via a swap pte (an anonymous page) will take a | ||
204 | * page_lock on the old page which will block the process until the | ||
205 | * migration attempt is complete. At that time the PageSwapCache bit | ||
206 | * will be examined. If the page was migrated then the PageSwapCache | ||
207 | * bit will be clear and the operation to retrieve the page will be | ||
208 | * retried which will find the new page in the radix tree. Then a new | ||
209 | * direct mapping may be generated based on the radix tree contents. | ||
210 | * | ||
211 | * If the page was not migrated then the PageSwapCache bit | ||
212 | * is still set and the operation may continue. | ||
213 | */ | ||
214 | if (try_to_unmap(page, 1) == SWAP_FAIL) | ||
215 | /* A vma has VM_LOCKED set -> permanent failure */ | ||
216 | return -EPERM; | ||
217 | 226 | ||
218 | /* | 227 | /* |
219 | * Give up if we were unable to remove all mappings. | 228 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. |
220 | */ | 229 | */ |
221 | if (page_mapcount(page)) | 230 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); |
222 | return -EAGAIN; | 231 | spin_lock(&anon_vma->lock); |
232 | |||
233 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
234 | remove_migration_pte(vma, old, new); | ||
235 | |||
236 | spin_unlock(&anon_vma->lock); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Get rid of all migration entries and replace them by | ||
241 | * references to the indicated page. | ||
242 | */ | ||
243 | static void remove_migration_ptes(struct page *old, struct page *new) | ||
244 | { | ||
245 | if (PageAnon(new)) | ||
246 | remove_anon_migration_ptes(old, new); | ||
247 | else | ||
248 | remove_file_migration_ptes(old, new); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Something used the pte of a page under migration. We need to | ||
253 | * get to the page and wait until migration is finished. | ||
254 | * When we return from this function the fault will be retried. | ||
255 | * | ||
256 | * This function is called from do_swap_page(). | ||
257 | */ | ||
258 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | ||
259 | unsigned long address) | ||
260 | { | ||
261 | pte_t *ptep, pte; | ||
262 | spinlock_t *ptl; | ||
263 | swp_entry_t entry; | ||
264 | struct page *page; | ||
265 | |||
266 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
267 | pte = *ptep; | ||
268 | if (!is_swap_pte(pte)) | ||
269 | goto out; | ||
270 | |||
271 | entry = pte_to_swp_entry(pte); | ||
272 | if (!is_migration_entry(entry)) | ||
273 | goto out; | ||
274 | |||
275 | page = migration_entry_to_page(entry); | ||
276 | |||
277 | get_page(page); | ||
278 | pte_unmap_unlock(ptep, ptl); | ||
279 | wait_on_page_locked(page); | ||
280 | put_page(page); | ||
281 | return; | ||
282 | out: | ||
283 | pte_unmap_unlock(ptep, ptl); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Replace the page in the mapping. | ||
288 | * | ||
289 | * The number of remaining references must be: | ||
290 | * 1 for anonymous pages without a mapping | ||
291 | * 2 for pages with a mapping | ||
292 | * 3 for pages with a mapping and PagePrivate set. | ||
293 | */ | ||
294 | static int migrate_page_move_mapping(struct address_space *mapping, | ||
295 | struct page *newpage, struct page *page) | ||
296 | { | ||
297 | struct page **radix_pointer; | ||
298 | |||
299 | if (!mapping) { | ||
300 | /* Anonymous page */ | ||
301 | if (page_count(page) != 1) | ||
302 | return -EAGAIN; | ||
303 | return 0; | ||
304 | } | ||
223 | 305 | ||
224 | write_lock_irq(&mapping->tree_lock); | 306 | write_lock_irq(&mapping->tree_lock); |
225 | 307 | ||
@@ -227,7 +309,7 @@ int migrate_page_remove_references(struct page *newpage, | |||
227 | &mapping->page_tree, | 309 | &mapping->page_tree, |
228 | page_index(page)); | 310 | page_index(page)); |
229 | 311 | ||
230 | if (!page_mapping(page) || page_count(page) != nr_refs || | 312 | if (page_count(page) != 2 + !!PagePrivate(page) || |
231 | *radix_pointer != page) { | 313 | *radix_pointer != page) { |
232 | write_unlock_irq(&mapping->tree_lock); | 314 | write_unlock_irq(&mapping->tree_lock); |
233 | return -EAGAIN; | 315 | return -EAGAIN; |
@@ -235,19 +317,14 @@ int migrate_page_remove_references(struct page *newpage, | |||
235 | 317 | ||
236 | /* | 318 | /* |
237 | * Now we know that no one else is looking at the page. | 319 | * Now we know that no one else is looking at the page. |
238 | * | ||
239 | * Certain minimal information about a page must be available | ||
240 | * in order for other subsystems to properly handle the page if they | ||
241 | * find it through the radix tree update before we are finished | ||
242 | * copying the page. | ||
243 | */ | 320 | */ |
244 | get_page(newpage); | 321 | get_page(newpage); |
245 | newpage->index = page->index; | 322 | #ifdef CONFIG_SWAP |
246 | newpage->mapping = page->mapping; | ||
247 | if (PageSwapCache(page)) { | 323 | if (PageSwapCache(page)) { |
248 | SetPageSwapCache(newpage); | 324 | SetPageSwapCache(newpage); |
249 | set_page_private(newpage, page_private(page)); | 325 | set_page_private(newpage, page_private(page)); |
250 | } | 326 | } |
327 | #endif | ||
251 | 328 | ||
252 | *radix_pointer = newpage; | 329 | *radix_pointer = newpage; |
253 | __put_page(page); | 330 | __put_page(page); |
@@ -255,12 +332,11 @@ int migrate_page_remove_references(struct page *newpage, | |||
255 | 332 | ||
256 | return 0; | 333 | return 0; |
257 | } | 334 | } |
258 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
259 | 335 | ||
260 | /* | 336 | /* |
261 | * Copy the page to its new location | 337 | * Copy the page to its new location |
262 | */ | 338 | */ |
263 | void migrate_page_copy(struct page *newpage, struct page *page) | 339 | static void migrate_page_copy(struct page *newpage, struct page *page) |
264 | { | 340 | { |
265 | copy_highpage(newpage, page); | 341 | copy_highpage(newpage, page); |
266 | 342 | ||
@@ -282,7 +358,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
282 | set_page_dirty(newpage); | 358 | set_page_dirty(newpage); |
283 | } | 359 | } |
284 | 360 | ||
361 | #ifdef CONFIG_SWAP | ||
285 | ClearPageSwapCache(page); | 362 | ClearPageSwapCache(page); |
363 | #endif | ||
286 | ClearPageActive(page); | 364 | ClearPageActive(page); |
287 | ClearPagePrivate(page); | 365 | ClearPagePrivate(page); |
288 | set_page_private(page, 0); | 366 | set_page_private(page, 0); |
@@ -295,7 +373,18 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
295 | if (PageWriteback(newpage)) | 373 | if (PageWriteback(newpage)) |
296 | end_page_writeback(newpage); | 374 | end_page_writeback(newpage); |
297 | } | 375 | } |
298 | EXPORT_SYMBOL(migrate_page_copy); | 376 | |
377 | /************************************************************ | ||
378 | * Migration functions | ||
379 | ***********************************************************/ | ||
380 | |||
381 | /* Always fail migration. Used for mappings that are not movable */ | ||
382 | int fail_migrate_page(struct address_space *mapping, | ||
383 | struct page *newpage, struct page *page) | ||
384 | { | ||
385 | return -EIO; | ||
386 | } | ||
387 | EXPORT_SYMBOL(fail_migrate_page); | ||
299 | 388 | ||
300 | /* | 389 | /* |
301 | * Common logic to directly migrate a single page suitable for | 390 | * Common logic to directly migrate a single page suitable for |
@@ -303,51 +392,286 @@ EXPORT_SYMBOL(migrate_page_copy); | |||
303 | * | 392 | * |
304 | * Pages are locked upon entry and exit. | 393 | * Pages are locked upon entry and exit. |
305 | */ | 394 | */ |
306 | int migrate_page(struct page *newpage, struct page *page) | 395 | int migrate_page(struct address_space *mapping, |
396 | struct page *newpage, struct page *page) | ||
307 | { | 397 | { |
308 | int rc; | 398 | int rc; |
309 | 399 | ||
310 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | 400 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
311 | 401 | ||
312 | rc = migrate_page_remove_references(newpage, page, 2); | 402 | rc = migrate_page_move_mapping(mapping, newpage, page); |
403 | |||
404 | if (rc) | ||
405 | return rc; | ||
406 | |||
407 | migrate_page_copy(newpage, page); | ||
408 | return 0; | ||
409 | } | ||
410 | EXPORT_SYMBOL(migrate_page); | ||
411 | |||
412 | /* | ||
413 | * Migration function for pages with buffers. This function can only be used | ||
414 | * if the underlying filesystem guarantees that no other references to "page" | ||
415 | * exist. | ||
416 | */ | ||
417 | int buffer_migrate_page(struct address_space *mapping, | ||
418 | struct page *newpage, struct page *page) | ||
419 | { | ||
420 | struct buffer_head *bh, *head; | ||
421 | int rc; | ||
422 | |||
423 | if (!page_has_buffers(page)) | ||
424 | return migrate_page(mapping, newpage, page); | ||
425 | |||
426 | head = page_buffers(page); | ||
427 | |||
428 | rc = migrate_page_move_mapping(mapping, newpage, page); | ||
313 | 429 | ||
314 | if (rc) | 430 | if (rc) |
315 | return rc; | 431 | return rc; |
316 | 432 | ||
433 | bh = head; | ||
434 | do { | ||
435 | get_bh(bh); | ||
436 | lock_buffer(bh); | ||
437 | bh = bh->b_this_page; | ||
438 | |||
439 | } while (bh != head); | ||
440 | |||
441 | ClearPagePrivate(page); | ||
442 | set_page_private(newpage, page_private(page)); | ||
443 | set_page_private(page, 0); | ||
444 | put_page(page); | ||
445 | get_page(newpage); | ||
446 | |||
447 | bh = head; | ||
448 | do { | ||
449 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
450 | bh = bh->b_this_page; | ||
451 | |||
452 | } while (bh != head); | ||
453 | |||
454 | SetPagePrivate(newpage); | ||
455 | |||
317 | migrate_page_copy(newpage, page); | 456 | migrate_page_copy(newpage, page); |
318 | 457 | ||
458 | bh = head; | ||
459 | do { | ||
460 | unlock_buffer(bh); | ||
461 | put_bh(bh); | ||
462 | bh = bh->b_this_page; | ||
463 | |||
464 | } while (bh != head); | ||
465 | |||
466 | return 0; | ||
467 | } | ||
468 | EXPORT_SYMBOL(buffer_migrate_page); | ||
469 | |||
470 | /* | ||
471 | * Writeback a page to clean the dirty state | ||
472 | */ | ||
473 | static int writeout(struct address_space *mapping, struct page *page) | ||
474 | { | ||
475 | struct writeback_control wbc = { | ||
476 | .sync_mode = WB_SYNC_NONE, | ||
477 | .nr_to_write = 1, | ||
478 | .range_start = 0, | ||
479 | .range_end = LLONG_MAX, | ||
480 | .nonblocking = 1, | ||
481 | .for_reclaim = 1 | ||
482 | }; | ||
483 | int rc; | ||
484 | |||
485 | if (!mapping->a_ops->writepage) | ||
486 | /* No write method for the address space */ | ||
487 | return -EINVAL; | ||
488 | |||
489 | if (!clear_page_dirty_for_io(page)) | ||
490 | /* Someone else already triggered a write */ | ||
491 | return -EAGAIN; | ||
492 | |||
319 | /* | 493 | /* |
320 | * Remove auxiliary swap entries and replace | 494 | * A dirty page may imply that the underlying filesystem has |
321 | * them with real ptes. | 495 | * the page on some queue. So the page must be clean for |
322 | * | 496 | * migration. Writeout may mean we loose the lock and the |
323 | * Note that a real pte entry will allow processes that are not | 497 | * page state is no longer what we checked for earlier. |
324 | * waiting on the page lock to use the new page via the page tables | 498 | * At this point we know that the migration attempt cannot |
325 | * before the new page is unlocked. | 499 | * be successful. |
326 | */ | 500 | */ |
327 | remove_from_swap(newpage); | 501 | remove_migration_ptes(page, page); |
328 | return 0; | 502 | |
503 | rc = mapping->a_ops->writepage(page, &wbc); | ||
504 | if (rc < 0) | ||
505 | /* I/O Error writing */ | ||
506 | return -EIO; | ||
507 | |||
508 | if (rc != AOP_WRITEPAGE_ACTIVATE) | ||
509 | /* unlocked. Relock */ | ||
510 | lock_page(page); | ||
511 | |||
512 | return -EAGAIN; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Default handling if a filesystem does not provide a migration function. | ||
517 | */ | ||
518 | static int fallback_migrate_page(struct address_space *mapping, | ||
519 | struct page *newpage, struct page *page) | ||
520 | { | ||
521 | if (PageDirty(page)) | ||
522 | return writeout(mapping, page); | ||
523 | |||
524 | /* | ||
525 | * Buffers may be managed in a filesystem specific way. | ||
526 | * We must have no buffers or drop them. | ||
527 | */ | ||
528 | if (page_has_buffers(page) && | ||
529 | !try_to_release_page(page, GFP_KERNEL)) | ||
530 | return -EAGAIN; | ||
531 | |||
532 | return migrate_page(mapping, newpage, page); | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Move a page to a newly allocated page | ||
537 | * The page is locked and all ptes have been successfully removed. | ||
538 | * | ||
539 | * The new page will have replaced the old page if this function | ||
540 | * is successful. | ||
541 | */ | ||
542 | static int move_to_new_page(struct page *newpage, struct page *page) | ||
543 | { | ||
544 | struct address_space *mapping; | ||
545 | int rc; | ||
546 | |||
547 | /* | ||
548 | * Block others from accessing the page when we get around to | ||
549 | * establishing additional references. We are the only one | ||
550 | * holding a reference to the new page at this point. | ||
551 | */ | ||
552 | if (TestSetPageLocked(newpage)) | ||
553 | BUG(); | ||
554 | |||
555 | /* Prepare mapping for the new page.*/ | ||
556 | newpage->index = page->index; | ||
557 | newpage->mapping = page->mapping; | ||
558 | |||
559 | mapping = page_mapping(page); | ||
560 | if (!mapping) | ||
561 | rc = migrate_page(mapping, newpage, page); | ||
562 | else if (mapping->a_ops->migratepage) | ||
563 | /* | ||
564 | * Most pages have a mapping and most filesystems | ||
565 | * should provide a migration function. Anonymous | ||
566 | * pages are part of swap space which also has its | ||
567 | * own migration function. This is the most common | ||
568 | * path for page migration. | ||
569 | */ | ||
570 | rc = mapping->a_ops->migratepage(mapping, | ||
571 | newpage, page); | ||
572 | else | ||
573 | rc = fallback_migrate_page(mapping, newpage, page); | ||
574 | |||
575 | if (!rc) | ||
576 | remove_migration_ptes(page, newpage); | ||
577 | else | ||
578 | newpage->mapping = NULL; | ||
579 | |||
580 | unlock_page(newpage); | ||
581 | |||
582 | return rc; | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * Obtain the lock on page, remove all ptes and migrate the page | ||
587 | * to the newly allocated page in newpage. | ||
588 | */ | ||
589 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | ||
590 | struct page *page, int force) | ||
591 | { | ||
592 | int rc = 0; | ||
593 | int *result = NULL; | ||
594 | struct page *newpage = get_new_page(page, private, &result); | ||
595 | |||
596 | if (!newpage) | ||
597 | return -ENOMEM; | ||
598 | |||
599 | if (page_count(page) == 1) | ||
600 | /* page was freed from under us. So we are done. */ | ||
601 | goto move_newpage; | ||
602 | |||
603 | rc = -EAGAIN; | ||
604 | if (TestSetPageLocked(page)) { | ||
605 | if (!force) | ||
606 | goto move_newpage; | ||
607 | lock_page(page); | ||
608 | } | ||
609 | |||
610 | if (PageWriteback(page)) { | ||
611 | if (!force) | ||
612 | goto unlock; | ||
613 | wait_on_page_writeback(page); | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Establish migration ptes or remove ptes | ||
618 | */ | ||
619 | if (try_to_unmap(page, 1) != SWAP_FAIL) { | ||
620 | if (!page_mapped(page)) | ||
621 | rc = move_to_new_page(newpage, page); | ||
622 | } else | ||
623 | /* A vma has VM_LOCKED set -> permanent failure */ | ||
624 | rc = -EPERM; | ||
625 | |||
626 | if (rc) | ||
627 | remove_migration_ptes(page, page); | ||
628 | unlock: | ||
629 | unlock_page(page); | ||
630 | |||
631 | if (rc != -EAGAIN) { | ||
632 | /* | ||
633 | * A page that has been migrated has all references | ||
634 | * removed and will be freed. A page that has not been | ||
635 | * migrated will have kepts its references and be | ||
636 | * restored. | ||
637 | */ | ||
638 | list_del(&page->lru); | ||
639 | move_to_lru(page); | ||
640 | } | ||
641 | |||
642 | move_newpage: | ||
643 | /* | ||
644 | * Move the new page to the LRU. If migration was not successful | ||
645 | * then this will free the page. | ||
646 | */ | ||
647 | move_to_lru(newpage); | ||
648 | if (result) { | ||
649 | if (rc) | ||
650 | *result = rc; | ||
651 | else | ||
652 | *result = page_to_nid(newpage); | ||
653 | } | ||
654 | return rc; | ||
329 | } | 655 | } |
330 | EXPORT_SYMBOL(migrate_page); | ||
331 | 656 | ||
332 | /* | 657 | /* |
333 | * migrate_pages | 658 | * migrate_pages |
334 | * | 659 | * |
335 | * Two lists are passed to this function. The first list | 660 | * The function takes one list of pages to migrate and a function |
336 | * contains the pages isolated from the LRU to be migrated. | 661 | * that determines from the page to be migrated and the private data |
337 | * The second list contains new pages that the pages isolated | 662 | * the target of the move and allocates the page. |
338 | * can be moved to. If the second list is NULL then all | ||
339 | * pages are swapped out. | ||
340 | * | 663 | * |
341 | * The function returns after 10 attempts or if no pages | 664 | * The function returns after 10 attempts or if no pages |
342 | * are movable anymore because to has become empty | 665 | * are movable anymore because to has become empty |
343 | * or no retryable pages exist anymore. | 666 | * or no retryable pages exist anymore. All pages will be |
667 | * retruned to the LRU or freed. | ||
344 | * | 668 | * |
345 | * Return: Number of pages not migrated when "to" ran empty. | 669 | * Return: Number of pages not migrated or error code. |
346 | */ | 670 | */ |
347 | int migrate_pages(struct list_head *from, struct list_head *to, | 671 | int migrate_pages(struct list_head *from, |
348 | struct list_head *moved, struct list_head *failed) | 672 | new_page_t get_new_page, unsigned long private) |
349 | { | 673 | { |
350 | int retry; | 674 | int retry = 1; |
351 | int nr_failed = 0; | 675 | int nr_failed = 0; |
352 | int pass = 0; | 676 | int pass = 0; |
353 | struct page *page; | 677 | struct page *page; |
@@ -358,305 +682,297 @@ int migrate_pages(struct list_head *from, struct list_head *to, | |||
358 | if (!swapwrite) | 682 | if (!swapwrite) |
359 | current->flags |= PF_SWAPWRITE; | 683 | current->flags |= PF_SWAPWRITE; |
360 | 684 | ||
361 | redo: | 685 | for(pass = 0; pass < 10 && retry; pass++) { |
362 | retry = 0; | 686 | retry = 0; |
687 | |||
688 | list_for_each_entry_safe(page, page2, from, lru) { | ||
689 | cond_resched(); | ||
690 | |||
691 | rc = unmap_and_move(get_new_page, private, | ||
692 | page, pass > 2); | ||
693 | |||
694 | switch(rc) { | ||
695 | case -ENOMEM: | ||
696 | goto out; | ||
697 | case -EAGAIN: | ||
698 | retry++; | ||
699 | break; | ||
700 | case 0: | ||
701 | break; | ||
702 | default: | ||
703 | /* Permanent failure */ | ||
704 | nr_failed++; | ||
705 | break; | ||
706 | } | ||
707 | } | ||
708 | } | ||
709 | rc = 0; | ||
710 | out: | ||
711 | if (!swapwrite) | ||
712 | current->flags &= ~PF_SWAPWRITE; | ||
363 | 713 | ||
364 | list_for_each_entry_safe(page, page2, from, lru) { | 714 | putback_lru_pages(from); |
365 | struct page *newpage = NULL; | ||
366 | struct address_space *mapping; | ||
367 | 715 | ||
368 | cond_resched(); | 716 | if (rc) |
717 | return rc; | ||
369 | 718 | ||
370 | rc = 0; | 719 | return nr_failed + retry; |
371 | if (page_count(page) == 1) | 720 | } |
372 | /* page was freed from under us. So we are done. */ | ||
373 | goto next; | ||
374 | 721 | ||
375 | if (to && list_empty(to)) | 722 | #ifdef CONFIG_NUMA |
376 | break; | 723 | /* |
724 | * Move a list of individual pages | ||
725 | */ | ||
726 | struct page_to_node { | ||
727 | unsigned long addr; | ||
728 | struct page *page; | ||
729 | int node; | ||
730 | int status; | ||
731 | }; | ||
377 | 732 | ||
378 | /* | 733 | static struct page *new_page_node(struct page *p, unsigned long private, |
379 | * Skip locked pages during the first two passes to give the | 734 | int **result) |
380 | * functions holding the lock time to release the page. Later we | 735 | { |
381 | * use lock_page() to have a higher chance of acquiring the | 736 | struct page_to_node *pm = (struct page_to_node *)private; |
382 | * lock. | ||
383 | */ | ||
384 | rc = -EAGAIN; | ||
385 | if (pass > 2) | ||
386 | lock_page(page); | ||
387 | else | ||
388 | if (TestSetPageLocked(page)) | ||
389 | goto next; | ||
390 | 737 | ||
391 | /* | 738 | while (pm->node != MAX_NUMNODES && pm->page != p) |
392 | * Only wait on writeback if we have already done a pass where | 739 | pm++; |
393 | * we we may have triggered writeouts for lots of pages. | ||
394 | */ | ||
395 | if (pass > 0) { | ||
396 | wait_on_page_writeback(page); | ||
397 | } else { | ||
398 | if (PageWriteback(page)) | ||
399 | goto unlock_page; | ||
400 | } | ||
401 | 740 | ||
402 | /* | 741 | if (pm->node == MAX_NUMNODES) |
403 | * Anonymous pages must have swap cache references otherwise | 742 | return NULL; |
404 | * the information contained in the page maps cannot be | ||
405 | * preserved. | ||
406 | */ | ||
407 | if (PageAnon(page) && !PageSwapCache(page)) { | ||
408 | if (!add_to_swap(page, GFP_KERNEL)) { | ||
409 | rc = -ENOMEM; | ||
410 | goto unlock_page; | ||
411 | } | ||
412 | } | ||
413 | 743 | ||
414 | if (!to) { | 744 | *result = &pm->status; |
415 | rc = swap_page(page); | ||
416 | goto next; | ||
417 | } | ||
418 | 745 | ||
419 | newpage = lru_to_page(to); | 746 | return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); |
420 | lock_page(newpage); | 747 | } |
421 | 748 | ||
422 | /* | 749 | /* |
423 | * Pages are properly locked and writeback is complete. | 750 | * Move a set of pages as indicated in the pm array. The addr |
424 | * Try to migrate the page. | 751 | * field must be set to the virtual address of the page to be moved |
425 | */ | 752 | * and the node number must contain a valid target node. |
426 | mapping = page_mapping(page); | 753 | */ |
427 | if (!mapping) | 754 | static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, |
428 | goto unlock_both; | 755 | int migrate_all) |
756 | { | ||
757 | int err; | ||
758 | struct page_to_node *pp; | ||
759 | LIST_HEAD(pagelist); | ||
429 | 760 | ||
430 | if (mapping->a_ops->migratepage) { | 761 | down_read(&mm->mmap_sem); |
431 | /* | ||
432 | * Most pages have a mapping and most filesystems | ||
433 | * should provide a migration function. Anonymous | ||
434 | * pages are part of swap space which also has its | ||
435 | * own migration function. This is the most common | ||
436 | * path for page migration. | ||
437 | */ | ||
438 | rc = mapping->a_ops->migratepage(newpage, page); | ||
439 | goto unlock_both; | ||
440 | } | ||
441 | |||
442 | /* Make sure the dirty bit is up to date */ | ||
443 | if (try_to_unmap(page, 1) == SWAP_FAIL) { | ||
444 | rc = -EPERM; | ||
445 | goto unlock_both; | ||
446 | } | ||
447 | 762 | ||
448 | if (page_mapcount(page)) { | 763 | /* |
449 | rc = -EAGAIN; | 764 | * Build a list of pages to migrate |
450 | goto unlock_both; | 765 | */ |
451 | } | 766 | migrate_prep(); |
767 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | ||
768 | struct vm_area_struct *vma; | ||
769 | struct page *page; | ||
452 | 770 | ||
453 | /* | 771 | /* |
454 | * Default handling if a filesystem does not provide | 772 | * A valid page pointer that will not match any of the |
455 | * a migration function. We can only migrate clean | 773 | * pages that will be moved. |
456 | * pages so try to write out any dirty pages first. | ||
457 | */ | 774 | */ |
458 | if (PageDirty(page)) { | 775 | pp->page = ZERO_PAGE(0); |
459 | switch (pageout(page, mapping)) { | ||
460 | case PAGE_KEEP: | ||
461 | case PAGE_ACTIVATE: | ||
462 | goto unlock_both; | ||
463 | |||
464 | case PAGE_SUCCESS: | ||
465 | unlock_page(newpage); | ||
466 | goto next; | ||
467 | |||
468 | case PAGE_CLEAN: | ||
469 | ; /* try to migrate the page below */ | ||
470 | } | ||
471 | } | ||
472 | 776 | ||
473 | /* | 777 | err = -EFAULT; |
474 | * Buffers are managed in a filesystem specific way. | 778 | vma = find_vma(mm, pp->addr); |
475 | * We must have no buffers or drop them. | 779 | if (!vma) |
476 | */ | 780 | goto set_status; |
477 | if (!page_has_buffers(page) || | ||
478 | try_to_release_page(page, GFP_KERNEL)) { | ||
479 | rc = migrate_page(newpage, page); | ||
480 | goto unlock_both; | ||
481 | } | ||
482 | 781 | ||
483 | /* | 782 | page = follow_page(vma, pp->addr, FOLL_GET); |
484 | * On early passes with mapped pages simply | 783 | err = -ENOENT; |
485 | * retry. There may be a lock held for some | 784 | if (!page) |
486 | * buffers that may go away. Later | 785 | goto set_status; |
487 | * swap them out. | 786 | |
488 | */ | 787 | if (PageReserved(page)) /* Check for zero page */ |
489 | if (pass > 4) { | 788 | goto put_and_set; |
789 | |||
790 | pp->page = page; | ||
791 | err = page_to_nid(page); | ||
792 | |||
793 | if (err == pp->node) | ||
490 | /* | 794 | /* |
491 | * Persistently unable to drop buffers..... As a | 795 | * Node already in the right place |
492 | * measure of last resort we fall back to | ||
493 | * swap_page(). | ||
494 | */ | 796 | */ |
495 | unlock_page(newpage); | 797 | goto put_and_set; |
496 | newpage = NULL; | ||
497 | rc = swap_page(page); | ||
498 | goto next; | ||
499 | } | ||
500 | 798 | ||
501 | unlock_both: | 799 | err = -EACCES; |
502 | unlock_page(newpage); | 800 | if (page_mapcount(page) > 1 && |
503 | 801 | !migrate_all) | |
504 | unlock_page: | 802 | goto put_and_set; |
505 | unlock_page(page); | 803 | |
506 | 804 | err = isolate_lru_page(page, &pagelist); | |
507 | next: | 805 | put_and_set: |
508 | if (rc == -EAGAIN) { | 806 | /* |
509 | retry++; | 807 | * Either remove the duplicate refcount from |
510 | } else if (rc) { | 808 | * isolate_lru_page() or drop the page ref if it was |
511 | /* Permanent failure */ | 809 | * not isolated. |
512 | list_move(&page->lru, failed); | 810 | */ |
513 | nr_failed++; | 811 | put_page(page); |
514 | } else { | 812 | set_status: |
515 | if (newpage) { | 813 | pp->status = err; |
516 | /* Successful migration. Return page to LRU */ | ||
517 | move_to_lru(newpage); | ||
518 | } | ||
519 | list_move(&page->lru, moved); | ||
520 | } | ||
521 | } | 814 | } |
522 | if (retry && pass++ < 10) | ||
523 | goto redo; | ||
524 | 815 | ||
525 | if (!swapwrite) | 816 | if (!list_empty(&pagelist)) |
526 | current->flags &= ~PF_SWAPWRITE; | 817 | err = migrate_pages(&pagelist, new_page_node, |
818 | (unsigned long)pm); | ||
819 | else | ||
820 | err = -ENOENT; | ||
527 | 821 | ||
528 | return nr_failed + retry; | 822 | up_read(&mm->mmap_sem); |
823 | return err; | ||
529 | } | 824 | } |
530 | 825 | ||
531 | /* | 826 | /* |
532 | * Migration function for pages with buffers. This function can only be used | 827 | * Determine the nodes of a list of pages. The addr in the pm array |
533 | * if the underlying filesystem guarantees that no other references to "page" | 828 | * must have been set to the virtual address of which we want to determine |
534 | * exist. | 829 | * the node number. |
535 | */ | 830 | */ |
536 | int buffer_migrate_page(struct page *newpage, struct page *page) | 831 | static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) |
537 | { | 832 | { |
538 | struct address_space *mapping = page->mapping; | 833 | down_read(&mm->mmap_sem); |
539 | struct buffer_head *bh, *head; | 834 | |
540 | int rc; | 835 | for ( ; pm->node != MAX_NUMNODES; pm++) { |
836 | struct vm_area_struct *vma; | ||
837 | struct page *page; | ||
838 | int err; | ||
839 | |||
840 | err = -EFAULT; | ||
841 | vma = find_vma(mm, pm->addr); | ||
842 | if (!vma) | ||
843 | goto set_status; | ||
844 | |||
845 | page = follow_page(vma, pm->addr, 0); | ||
846 | err = -ENOENT; | ||
847 | /* Use PageReserved to check for zero page */ | ||
848 | if (!page || PageReserved(page)) | ||
849 | goto set_status; | ||
850 | |||
851 | err = page_to_nid(page); | ||
852 | set_status: | ||
853 | pm->status = err; | ||
854 | } | ||
541 | 855 | ||
542 | if (!mapping) | 856 | up_read(&mm->mmap_sem); |
543 | return -EAGAIN; | 857 | return 0; |
858 | } | ||
544 | 859 | ||
545 | if (!page_has_buffers(page)) | 860 | /* |
546 | return migrate_page(newpage, page); | 861 | * Move a list of pages in the address space of the currently executing |
862 | * process. | ||
863 | */ | ||
864 | asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
865 | const void __user * __user *pages, | ||
866 | const int __user *nodes, | ||
867 | int __user *status, int flags) | ||
868 | { | ||
869 | int err = 0; | ||
870 | int i; | ||
871 | struct task_struct *task; | ||
872 | nodemask_t task_nodes; | ||
873 | struct mm_struct *mm; | ||
874 | struct page_to_node *pm = NULL; | ||
547 | 875 | ||
548 | head = page_buffers(page); | 876 | /* Check flags */ |
877 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | ||
878 | return -EINVAL; | ||
549 | 879 | ||
550 | rc = migrate_page_remove_references(newpage, page, 3); | 880 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) |
881 | return -EPERM; | ||
551 | 882 | ||
552 | if (rc) | 883 | /* Find the mm_struct */ |
553 | return rc; | 884 | read_lock(&tasklist_lock); |
885 | task = pid ? find_task_by_pid(pid) : current; | ||
886 | if (!task) { | ||
887 | read_unlock(&tasklist_lock); | ||
888 | return -ESRCH; | ||
889 | } | ||
890 | mm = get_task_mm(task); | ||
891 | read_unlock(&tasklist_lock); | ||
554 | 892 | ||
555 | bh = head; | 893 | if (!mm) |
556 | do { | 894 | return -EINVAL; |
557 | get_bh(bh); | ||
558 | lock_buffer(bh); | ||
559 | bh = bh->b_this_page; | ||
560 | 895 | ||
561 | } while (bh != head); | 896 | /* |
897 | * Check if this process has the right to modify the specified | ||
898 | * process. The right exists if the process has administrative | ||
899 | * capabilities, superuser privileges or the same | ||
900 | * userid as the target process. | ||
901 | */ | ||
902 | if ((current->euid != task->suid) && (current->euid != task->uid) && | ||
903 | (current->uid != task->suid) && (current->uid != task->uid) && | ||
904 | !capable(CAP_SYS_NICE)) { | ||
905 | err = -EPERM; | ||
906 | goto out2; | ||
907 | } | ||
562 | 908 | ||
563 | ClearPagePrivate(page); | 909 | err = security_task_movememory(task); |
564 | set_page_private(newpage, page_private(page)); | 910 | if (err) |
565 | set_page_private(page, 0); | 911 | goto out2; |
566 | put_page(page); | ||
567 | get_page(newpage); | ||
568 | 912 | ||
569 | bh = head; | ||
570 | do { | ||
571 | set_bh_page(bh, newpage, bh_offset(bh)); | ||
572 | bh = bh->b_this_page; | ||
573 | 913 | ||
574 | } while (bh != head); | 914 | task_nodes = cpuset_mems_allowed(task); |
575 | 915 | ||
576 | SetPagePrivate(newpage); | 916 | /* Limit nr_pages so that the multiplication may not overflow */ |
917 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | ||
918 | err = -E2BIG; | ||
919 | goto out2; | ||
920 | } | ||
577 | 921 | ||
578 | migrate_page_copy(newpage, page); | 922 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); |
923 | if (!pm) { | ||
924 | err = -ENOMEM; | ||
925 | goto out2; | ||
926 | } | ||
579 | 927 | ||
580 | bh = head; | 928 | /* |
581 | do { | 929 | * Get parameters from user space and initialize the pm |
582 | unlock_buffer(bh); | 930 | * array. Return various errors if the user did something wrong. |
583 | put_bh(bh); | 931 | */ |
584 | bh = bh->b_this_page; | 932 | for (i = 0; i < nr_pages; i++) { |
933 | const void *p; | ||
585 | 934 | ||
586 | } while (bh != head); | 935 | err = -EFAULT; |
936 | if (get_user(p, pages + i)) | ||
937 | goto out; | ||
587 | 938 | ||
588 | return 0; | 939 | pm[i].addr = (unsigned long)p; |
589 | } | 940 | if (nodes) { |
590 | EXPORT_SYMBOL(buffer_migrate_page); | 941 | int node; |
591 | 942 | ||
592 | /* | 943 | if (get_user(node, nodes + i)) |
593 | * Migrate the list 'pagelist' of pages to a certain destination. | 944 | goto out; |
594 | * | ||
595 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
596 | * Return the number of pages not migrated or error code | ||
597 | */ | ||
598 | int migrate_pages_to(struct list_head *pagelist, | ||
599 | struct vm_area_struct *vma, int dest) | ||
600 | { | ||
601 | LIST_HEAD(newlist); | ||
602 | LIST_HEAD(moved); | ||
603 | LIST_HEAD(failed); | ||
604 | int err = 0; | ||
605 | unsigned long offset = 0; | ||
606 | int nr_pages; | ||
607 | struct page *page; | ||
608 | struct list_head *p; | ||
609 | 945 | ||
610 | redo: | 946 | err = -ENODEV; |
611 | nr_pages = 0; | 947 | if (!node_online(node)) |
612 | list_for_each(p, pagelist) { | 948 | goto out; |
613 | if (vma) { | ||
614 | /* | ||
615 | * The address passed to alloc_page_vma is used to | ||
616 | * generate the proper interleave behavior. We fake | ||
617 | * the address here by an increasing offset in order | ||
618 | * to get the proper distribution of pages. | ||
619 | * | ||
620 | * No decision has been made as to which page | ||
621 | * a certain old page is moved to so we cannot | ||
622 | * specify the correct address. | ||
623 | */ | ||
624 | page = alloc_page_vma(GFP_HIGHUSER, vma, | ||
625 | offset + vma->vm_start); | ||
626 | offset += PAGE_SIZE; | ||
627 | } | ||
628 | else | ||
629 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
630 | 949 | ||
631 | if (!page) { | 950 | err = -EACCES; |
632 | err = -ENOMEM; | 951 | if (!node_isset(node, task_nodes)) |
633 | goto out; | 952 | goto out; |
953 | |||
954 | pm[i].node = node; | ||
634 | } | 955 | } |
635 | list_add_tail(&page->lru, &newlist); | ||
636 | nr_pages++; | ||
637 | if (nr_pages > MIGRATE_CHUNK_SIZE) | ||
638 | break; | ||
639 | } | 956 | } |
640 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | 957 | /* End marker */ |
958 | pm[nr_pages].node = MAX_NUMNODES; | ||
959 | |||
960 | if (nodes) | ||
961 | err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
962 | else | ||
963 | err = do_pages_stat(mm, pm); | ||
641 | 964 | ||
642 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | 965 | if (err >= 0) |
966 | /* Return status information */ | ||
967 | for (i = 0; i < nr_pages; i++) | ||
968 | if (put_user(pm[i].status, status + i)) | ||
969 | err = -EFAULT; | ||
643 | 970 | ||
644 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
645 | goto redo; | ||
646 | out: | 971 | out: |
647 | /* Return leftover allocated pages */ | 972 | vfree(pm); |
648 | while (!list_empty(&newlist)) { | 973 | out2: |
649 | page = list_entry(newlist.next, struct page, lru); | 974 | mmput(mm); |
650 | list_del(&page->lru); | 975 | return err; |
651 | __free_page(page); | ||
652 | } | ||
653 | list_splice(&failed, pagelist); | ||
654 | if (err < 0) | ||
655 | return err; | ||
656 | |||
657 | /* Calculate number of leftover pages */ | ||
658 | nr_pages = 0; | ||
659 | list_for_each(p, pagelist) | ||
660 | nr_pages++; | ||
661 | return nr_pages; | ||
662 | } | 976 | } |
977 | #endif | ||
978 | |||