diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 127 |
1 files changed, 100 insertions, 27 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 96b9546e69e0..b5000d463893 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
27 | #include <linux/writeback.h> | ||
27 | 28 | ||
28 | #include "internal.h" | 29 | #include "internal.h" |
29 | 30 | ||
@@ -123,7 +124,7 @@ static inline int is_swap_pte(pte_t pte) | |||
123 | /* | 124 | /* |
124 | * Restore a potential migration pte to a working pte entry | 125 | * Restore a potential migration pte to a working pte entry |
125 | */ | 126 | */ |
126 | static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | 127 | static void remove_migration_pte(struct vm_area_struct *vma, |
127 | struct page *old, struct page *new) | 128 | struct page *old, struct page *new) |
128 | { | 129 | { |
129 | struct mm_struct *mm = vma->vm_mm; | 130 | struct mm_struct *mm = vma->vm_mm; |
@@ -133,6 +134,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | |||
133 | pmd_t *pmd; | 134 | pmd_t *pmd; |
134 | pte_t *ptep, pte; | 135 | pte_t *ptep, pte; |
135 | spinlock_t *ptl; | 136 | spinlock_t *ptl; |
137 | unsigned long addr = page_address_in_vma(new, vma); | ||
138 | |||
139 | if (addr == -EFAULT) | ||
140 | return; | ||
136 | 141 | ||
137 | pgd = pgd_offset(mm, addr); | 142 | pgd = pgd_offset(mm, addr); |
138 | if (!pgd_present(*pgd)) | 143 | if (!pgd_present(*pgd)) |
@@ -169,19 +174,47 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | |||
169 | if (is_write_migration_entry(entry)) | 174 | if (is_write_migration_entry(entry)) |
170 | pte = pte_mkwrite(pte); | 175 | pte = pte_mkwrite(pte); |
171 | set_pte_at(mm, addr, ptep, pte); | 176 | set_pte_at(mm, addr, ptep, pte); |
172 | page_add_anon_rmap(new, vma, addr); | 177 | |
178 | if (PageAnon(new)) | ||
179 | page_add_anon_rmap(new, vma, addr); | ||
180 | else | ||
181 | page_add_file_rmap(new); | ||
182 | |||
183 | /* No need to invalidate - it was non-present before */ | ||
184 | update_mmu_cache(vma, addr, pte); | ||
185 | lazy_mmu_prot_update(pte); | ||
186 | |||
173 | out: | 187 | out: |
174 | pte_unmap_unlock(ptep, ptl); | 188 | pte_unmap_unlock(ptep, ptl); |
175 | } | 189 | } |
176 | 190 | ||
177 | /* | 191 | /* |
178 | * Get rid of all migration entries and replace them by | 192 | * Note that remove_file_migration_ptes will only work on regular mappings, |
179 | * references to the indicated page. | 193 | * Nonlinear mappings do not use migration entries. |
180 | * | 194 | */ |
195 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
196 | { | ||
197 | struct vm_area_struct *vma; | ||
198 | struct address_space *mapping = page_mapping(new); | ||
199 | struct prio_tree_iter iter; | ||
200 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
201 | |||
202 | if (!mapping) | ||
203 | return; | ||
204 | |||
205 | spin_lock(&mapping->i_mmap_lock); | ||
206 | |||
207 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
208 | remove_migration_pte(vma, old, new); | ||
209 | |||
210 | spin_unlock(&mapping->i_mmap_lock); | ||
211 | } | ||
212 | |||
213 | /* | ||
181 | * Must hold mmap_sem lock on at least one of the vmas containing | 214 | * Must hold mmap_sem lock on at least one of the vmas containing |
182 | * the page so that the anon_vma cannot vanish. | 215 | * the page so that the anon_vma cannot vanish. |
183 | */ | 216 | */ |
184 | static void remove_migration_ptes(struct page *old, struct page *new) | 217 | static void remove_anon_migration_ptes(struct page *old, struct page *new) |
185 | { | 218 | { |
186 | struct anon_vma *anon_vma; | 219 | struct anon_vma *anon_vma; |
187 | struct vm_area_struct *vma; | 220 | struct vm_area_struct *vma; |
@@ -199,13 +232,24 @@ static void remove_migration_ptes(struct page *old, struct page *new) | |||
199 | spin_lock(&anon_vma->lock); | 232 | spin_lock(&anon_vma->lock); |
200 | 233 | ||
201 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | 234 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) |
202 | remove_migration_pte(vma, page_address_in_vma(new, vma), | 235 | remove_migration_pte(vma, old, new); |
203 | old, new); | ||
204 | 236 | ||
205 | spin_unlock(&anon_vma->lock); | 237 | spin_unlock(&anon_vma->lock); |
206 | } | 238 | } |
207 | 239 | ||
208 | /* | 240 | /* |
241 | * Get rid of all migration entries and replace them by | ||
242 | * references to the indicated page. | ||
243 | */ | ||
244 | static void remove_migration_ptes(struct page *old, struct page *new) | ||
245 | { | ||
246 | if (PageAnon(new)) | ||
247 | remove_anon_migration_ptes(old, new); | ||
248 | else | ||
249 | remove_file_migration_ptes(old, new); | ||
250 | } | ||
251 | |||
252 | /* | ||
209 | * Something used the pte of a page under migration. We need to | 253 | * Something used the pte of a page under migration. We need to |
210 | * get to the page and wait until migration is finished. | 254 | * get to the page and wait until migration is finished. |
211 | * When we return from this function the fault will be retried. | 255 | * When we return from this function the fault will be retried. |
@@ -424,30 +468,59 @@ int buffer_migrate_page(struct address_space *mapping, | |||
424 | } | 468 | } |
425 | EXPORT_SYMBOL(buffer_migrate_page); | 469 | EXPORT_SYMBOL(buffer_migrate_page); |
426 | 470 | ||
427 | static int fallback_migrate_page(struct address_space *mapping, | 471 | /* |
428 | struct page *newpage, struct page *page) | 472 | * Writeback a page to clean the dirty state |
473 | */ | ||
474 | static int writeout(struct address_space *mapping, struct page *page) | ||
429 | { | 475 | { |
476 | struct writeback_control wbc = { | ||
477 | .sync_mode = WB_SYNC_NONE, | ||
478 | .nr_to_write = 1, | ||
479 | .range_start = 0, | ||
480 | .range_end = LLONG_MAX, | ||
481 | .nonblocking = 1, | ||
482 | .for_reclaim = 1 | ||
483 | }; | ||
484 | int rc; | ||
485 | |||
486 | if (!mapping->a_ops->writepage) | ||
487 | /* No write method for the address space */ | ||
488 | return -EINVAL; | ||
489 | |||
490 | if (!clear_page_dirty_for_io(page)) | ||
491 | /* Someone else already triggered a write */ | ||
492 | return -EAGAIN; | ||
493 | |||
430 | /* | 494 | /* |
431 | * Default handling if a filesystem does not provide | 495 | * A dirty page may imply that the underlying filesystem has |
432 | * a migration function. We can only migrate clean | 496 | * the page on some queue. So the page must be clean for |
433 | * pages so try to write out any dirty pages first. | 497 | * migration. Writeout may mean we loose the lock and the |
498 | * page state is no longer what we checked for earlier. | ||
499 | * At this point we know that the migration attempt cannot | ||
500 | * be successful. | ||
434 | */ | 501 | */ |
435 | if (PageDirty(page)) { | 502 | remove_migration_ptes(page, page); |
436 | switch (pageout(page, mapping)) { | ||
437 | case PAGE_KEEP: | ||
438 | case PAGE_ACTIVATE: | ||
439 | return -EAGAIN; | ||
440 | 503 | ||
441 | case PAGE_SUCCESS: | 504 | rc = mapping->a_ops->writepage(page, &wbc); |
442 | /* Relock since we lost the lock */ | 505 | if (rc < 0) |
443 | lock_page(page); | 506 | /* I/O Error writing */ |
444 | /* Must retry since page state may have changed */ | 507 | return -EIO; |
445 | return -EAGAIN; | ||
446 | 508 | ||
447 | case PAGE_CLEAN: | 509 | if (rc != AOP_WRITEPAGE_ACTIVATE) |
448 | ; /* try to migrate the page below */ | 510 | /* unlocked. Relock */ |
449 | } | 511 | lock_page(page); |
450 | } | 512 | |
513 | return -EAGAIN; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Default handling if a filesystem does not provide a migration function. | ||
518 | */ | ||
519 | static int fallback_migrate_page(struct address_space *mapping, | ||
520 | struct page *newpage, struct page *page) | ||
521 | { | ||
522 | if (PageDirty(page)) | ||
523 | return writeout(mapping, page); | ||
451 | 524 | ||
452 | /* | 525 | /* |
453 | * Buffers may be managed in a filesystem specific way. | 526 | * Buffers may be managed in a filesystem specific way. |