diff options
-rw-r--r-- | include/linux/swap.h | 15 | ||||
-rw-r--r-- | mm/migrate.c | 127 | ||||
-rw-r--r-- | mm/rmap.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 14 |
4 files changed, 124 insertions, 43 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h index 7cee73ef4f15..1cf234e8df55 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -186,20 +186,6 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages); | |||
186 | extern int vm_swappiness; | 186 | extern int vm_swappiness; |
187 | extern int remove_mapping(struct address_space *mapping, struct page *page); | 187 | extern int remove_mapping(struct address_space *mapping, struct page *page); |
188 | 188 | ||
189 | /* possible outcome of pageout() */ | ||
190 | typedef enum { | ||
191 | /* failed to write page out, page is locked */ | ||
192 | PAGE_KEEP, | ||
193 | /* move page to the active list, page is locked */ | ||
194 | PAGE_ACTIVATE, | ||
195 | /* page has been sent to the disk successfully, page is unlocked */ | ||
196 | PAGE_SUCCESS, | ||
197 | /* page is clean and locked */ | ||
198 | PAGE_CLEAN, | ||
199 | } pageout_t; | ||
200 | |||
201 | extern pageout_t pageout(struct page *page, struct address_space *mapping); | ||
202 | |||
203 | #ifdef CONFIG_NUMA | 189 | #ifdef CONFIG_NUMA |
204 | extern int zone_reclaim_mode; | 190 | extern int zone_reclaim_mode; |
205 | extern int zone_reclaim_interval; | 191 | extern int zone_reclaim_interval; |
@@ -259,7 +245,6 @@ extern int remove_exclusive_swap_page(struct page *); | |||
259 | struct backing_dev_info; | 245 | struct backing_dev_info; |
260 | 246 | ||
261 | extern spinlock_t swap_lock; | 247 | extern spinlock_t swap_lock; |
262 | extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page); | ||
263 | 248 | ||
264 | /* linux/mm/thrash.c */ | 249 | /* linux/mm/thrash.c */ |
265 | extern struct mm_struct * swap_token_mm; | 250 | extern struct mm_struct * swap_token_mm; |
diff --git a/mm/migrate.c b/mm/migrate.c index 96b9546e69e0..b5000d463893 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
27 | #include <linux/writeback.h> | ||
27 | 28 | ||
28 | #include "internal.h" | 29 | #include "internal.h" |
29 | 30 | ||
@@ -123,7 +124,7 @@ static inline int is_swap_pte(pte_t pte) | |||
123 | /* | 124 | /* |
124 | * Restore a potential migration pte to a working pte entry | 125 | * Restore a potential migration pte to a working pte entry |
125 | */ | 126 | */ |
126 | static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | 127 | static void remove_migration_pte(struct vm_area_struct *vma, |
127 | struct page *old, struct page *new) | 128 | struct page *old, struct page *new) |
128 | { | 129 | { |
129 | struct mm_struct *mm = vma->vm_mm; | 130 | struct mm_struct *mm = vma->vm_mm; |
@@ -133,6 +134,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | |||
133 | pmd_t *pmd; | 134 | pmd_t *pmd; |
134 | pte_t *ptep, pte; | 135 | pte_t *ptep, pte; |
135 | spinlock_t *ptl; | 136 | spinlock_t *ptl; |
137 | unsigned long addr = page_address_in_vma(new, vma); | ||
138 | |||
139 | if (addr == -EFAULT) | ||
140 | return; | ||
136 | 141 | ||
137 | pgd = pgd_offset(mm, addr); | 142 | pgd = pgd_offset(mm, addr); |
138 | if (!pgd_present(*pgd)) | 143 | if (!pgd_present(*pgd)) |
@@ -169,19 +174,47 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, | |||
169 | if (is_write_migration_entry(entry)) | 174 | if (is_write_migration_entry(entry)) |
170 | pte = pte_mkwrite(pte); | 175 | pte = pte_mkwrite(pte); |
171 | set_pte_at(mm, addr, ptep, pte); | 176 | set_pte_at(mm, addr, ptep, pte); |
172 | page_add_anon_rmap(new, vma, addr); | 177 | |
178 | if (PageAnon(new)) | ||
179 | page_add_anon_rmap(new, vma, addr); | ||
180 | else | ||
181 | page_add_file_rmap(new); | ||
182 | |||
183 | /* No need to invalidate - it was non-present before */ | ||
184 | update_mmu_cache(vma, addr, pte); | ||
185 | lazy_mmu_prot_update(pte); | ||
186 | |||
173 | out: | 187 | out: |
174 | pte_unmap_unlock(ptep, ptl); | 188 | pte_unmap_unlock(ptep, ptl); |
175 | } | 189 | } |
176 | 190 | ||
177 | /* | 191 | /* |
178 | * Get rid of all migration entries and replace them by | 192 | * Note that remove_file_migration_ptes will only work on regular mappings, |
179 | * references to the indicated page. | 193 | * Nonlinear mappings do not use migration entries. |
180 | * | 194 | */ |
195 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
196 | { | ||
197 | struct vm_area_struct *vma; | ||
198 | struct address_space *mapping = page_mapping(new); | ||
199 | struct prio_tree_iter iter; | ||
200 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
201 | |||
202 | if (!mapping) | ||
203 | return; | ||
204 | |||
205 | spin_lock(&mapping->i_mmap_lock); | ||
206 | |||
207 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
208 | remove_migration_pte(vma, old, new); | ||
209 | |||
210 | spin_unlock(&mapping->i_mmap_lock); | ||
211 | } | ||
212 | |||
213 | /* | ||
181 | * Must hold mmap_sem lock on at least one of the vmas containing | 214 | * Must hold mmap_sem lock on at least one of the vmas containing |
182 | * the page so that the anon_vma cannot vanish. | 215 | * the page so that the anon_vma cannot vanish. |
183 | */ | 216 | */ |
184 | static void remove_migration_ptes(struct page *old, struct page *new) | 217 | static void remove_anon_migration_ptes(struct page *old, struct page *new) |
185 | { | 218 | { |
186 | struct anon_vma *anon_vma; | 219 | struct anon_vma *anon_vma; |
187 | struct vm_area_struct *vma; | 220 | struct vm_area_struct *vma; |
@@ -199,13 +232,24 @@ static void remove_migration_ptes(struct page *old, struct page *new) | |||
199 | spin_lock(&anon_vma->lock); | 232 | spin_lock(&anon_vma->lock); |
200 | 233 | ||
201 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | 234 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) |
202 | remove_migration_pte(vma, page_address_in_vma(new, vma), | 235 | remove_migration_pte(vma, old, new); |
203 | old, new); | ||
204 | 236 | ||
205 | spin_unlock(&anon_vma->lock); | 237 | spin_unlock(&anon_vma->lock); |
206 | } | 238 | } |
207 | 239 | ||
208 | /* | 240 | /* |
241 | * Get rid of all migration entries and replace them by | ||
242 | * references to the indicated page. | ||
243 | */ | ||
244 | static void remove_migration_ptes(struct page *old, struct page *new) | ||
245 | { | ||
246 | if (PageAnon(new)) | ||
247 | remove_anon_migration_ptes(old, new); | ||
248 | else | ||
249 | remove_file_migration_ptes(old, new); | ||
250 | } | ||
251 | |||
252 | /* | ||
209 | * Something used the pte of a page under migration. We need to | 253 | * Something used the pte of a page under migration. We need to |
210 | * get to the page and wait until migration is finished. | 254 | * get to the page and wait until migration is finished. |
211 | * When we return from this function the fault will be retried. | 255 | * When we return from this function the fault will be retried. |
@@ -424,30 +468,59 @@ int buffer_migrate_page(struct address_space *mapping, | |||
424 | } | 468 | } |
425 | EXPORT_SYMBOL(buffer_migrate_page); | 469 | EXPORT_SYMBOL(buffer_migrate_page); |
426 | 470 | ||
427 | static int fallback_migrate_page(struct address_space *mapping, | 471 | /* |
428 | struct page *newpage, struct page *page) | 472 | * Writeback a page to clean the dirty state |
473 | */ | ||
474 | static int writeout(struct address_space *mapping, struct page *page) | ||
429 | { | 475 | { |
476 | struct writeback_control wbc = { | ||
477 | .sync_mode = WB_SYNC_NONE, | ||
478 | .nr_to_write = 1, | ||
479 | .range_start = 0, | ||
480 | .range_end = LLONG_MAX, | ||
481 | .nonblocking = 1, | ||
482 | .for_reclaim = 1 | ||
483 | }; | ||
484 | int rc; | ||
485 | |||
486 | if (!mapping->a_ops->writepage) | ||
487 | /* No write method for the address space */ | ||
488 | return -EINVAL; | ||
489 | |||
490 | if (!clear_page_dirty_for_io(page)) | ||
491 | /* Someone else already triggered a write */ | ||
492 | return -EAGAIN; | ||
493 | |||
430 | /* | 494 | /* |
431 | * Default handling if a filesystem does not provide | 495 | * A dirty page may imply that the underlying filesystem has |
432 | * a migration function. We can only migrate clean | 496 | * the page on some queue. So the page must be clean for |
433 | * pages so try to write out any dirty pages first. | 497 | * migration. Writeout may mean we loose the lock and the |
498 | * page state is no longer what we checked for earlier. | ||
499 | * At this point we know that the migration attempt cannot | ||
500 | * be successful. | ||
434 | */ | 501 | */ |
435 | if (PageDirty(page)) { | 502 | remove_migration_ptes(page, page); |
436 | switch (pageout(page, mapping)) { | ||
437 | case PAGE_KEEP: | ||
438 | case PAGE_ACTIVATE: | ||
439 | return -EAGAIN; | ||
440 | 503 | ||
441 | case PAGE_SUCCESS: | 504 | rc = mapping->a_ops->writepage(page, &wbc); |
442 | /* Relock since we lost the lock */ | 505 | if (rc < 0) |
443 | lock_page(page); | 506 | /* I/O Error writing */ |
444 | /* Must retry since page state may have changed */ | 507 | return -EIO; |
445 | return -EAGAIN; | ||
446 | 508 | ||
447 | case PAGE_CLEAN: | 509 | if (rc != AOP_WRITEPAGE_ACTIVATE) |
448 | ; /* try to migrate the page below */ | 510 | /* unlocked. Relock */ |
449 | } | 511 | lock_page(page); |
450 | } | 512 | |
513 | return -EAGAIN; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Default handling if a filesystem does not provide a migration function. | ||
518 | */ | ||
519 | static int fallback_migrate_page(struct address_space *mapping, | ||
520 | struct page *newpage, struct page *page) | ||
521 | { | ||
522 | if (PageDirty(page)) | ||
523 | return writeout(mapping, page); | ||
451 | 524 | ||
452 | /* | 525 | /* |
453 | * Buffers may be managed in a filesystem specific way. | 526 | * Buffers may be managed in a filesystem specific way. |
@@ -596,6 +596,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
596 | spin_unlock(&mmlist_lock); | 596 | spin_unlock(&mmlist_lock); |
597 | } | 597 | } |
598 | dec_mm_counter(mm, anon_rss); | 598 | dec_mm_counter(mm, anon_rss); |
599 | #ifdef CONFIG_MIGRATION | ||
599 | } else { | 600 | } else { |
600 | /* | 601 | /* |
601 | * Store the pfn of the page in a special migration | 602 | * Store the pfn of the page in a special migration |
@@ -604,12 +605,22 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
604 | */ | 605 | */ |
605 | BUG_ON(!migration); | 606 | BUG_ON(!migration); |
606 | entry = make_migration_entry(page, pte_write(pteval)); | 607 | entry = make_migration_entry(page, pte_write(pteval)); |
608 | #endif | ||
607 | } | 609 | } |
608 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 610 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
609 | BUG_ON(pte_file(*pte)); | 611 | BUG_ON(pte_file(*pte)); |
610 | } else | 612 | } else |
613 | #ifdef CONFIG_MIGRATION | ||
614 | if (migration) { | ||
615 | /* Establish migration entry for a file page */ | ||
616 | swp_entry_t entry; | ||
617 | entry = make_migration_entry(page, pte_write(pteval)); | ||
618 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | ||
619 | } else | ||
620 | #endif | ||
611 | dec_mm_counter(mm, file_rss); | 621 | dec_mm_counter(mm, file_rss); |
612 | 622 | ||
623 | |||
613 | page_remove_rmap(page); | 624 | page_remove_rmap(page); |
614 | page_cache_release(page); | 625 | page_cache_release(page); |
615 | 626 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index bc5d4f43036c..71a02e295037 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -290,11 +290,23 @@ static void handle_write_error(struct address_space *mapping, | |||
290 | unlock_page(page); | 290 | unlock_page(page); |
291 | } | 291 | } |
292 | 292 | ||
293 | /* possible outcome of pageout() */ | ||
294 | typedef enum { | ||
295 | /* failed to write page out, page is locked */ | ||
296 | PAGE_KEEP, | ||
297 | /* move page to the active list, page is locked */ | ||
298 | PAGE_ACTIVATE, | ||
299 | /* page has been sent to the disk successfully, page is unlocked */ | ||
300 | PAGE_SUCCESS, | ||
301 | /* page is clean and locked */ | ||
302 | PAGE_CLEAN, | ||
303 | } pageout_t; | ||
304 | |||
293 | /* | 305 | /* |
294 | * pageout is called by shrink_page_list() for each dirty page. | 306 | * pageout is called by shrink_page_list() for each dirty page. |
295 | * Calls ->writepage(). | 307 | * Calls ->writepage(). |
296 | */ | 308 | */ |
297 | pageout_t pageout(struct page *page, struct address_space *mapping) | 309 | static pageout_t pageout(struct page *page, struct address_space *mapping) |
298 | { | 310 | { |
299 | /* | 311 | /* |
300 | * If the page is dirty, only perform writeback if that write | 312 | * If the page is dirty, only perform writeback if that write |