diff options
author | Nick Piggin <npiggin@suse.de> | 2006-01-18 20:42:27 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-18 22:20:17 -0500 |
commit | 053837fce7aa79025ed57656855df09f80175527 (patch) | |
tree | 05d7615894131a368fc4943f641b11acdd2ae694 | |
parent | e236a166b2bc437769a9b8b5d19186a3761bde48 (diff) |
[PATCH] mm: migration page refcounting fix
Migration code currently does not take a reference to target page
properly, so between unlocking the pte and trying to take a new
reference to the page with isolate_lru_page, anything could happen to
it.
Fix this by holding the pte lock until we get a chance to elevate the
refcount.
Other small cleanups while we're here.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/mm_inline.h | 21 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | mm/filemap.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 29 | ||||
-rw-r--r-- | mm/rmap.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 26 | ||||
-rw-r--r-- | mm/vmscan.c | 71 |
7 files changed, 76 insertions, 75 deletions
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 49cc68af01f8..8ac854f7f190 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page) | |||
39 | } | 39 | } |
40 | } | 40 | } |
41 | 41 | ||
42 | /* | ||
43 | * Isolate one page from the LRU lists. | ||
44 | * | ||
45 | * - zone->lru_lock must be held | ||
46 | */ | ||
47 | static inline int __isolate_lru_page(struct page *page) | ||
48 | { | ||
49 | if (unlikely(!TestClearPageLRU(page))) | ||
50 | return 0; | ||
51 | |||
52 | if (get_page_testone(page)) { | ||
53 | /* | ||
54 | * It is being freed elsewhere | ||
55 | */ | ||
56 | __put_page(page); | ||
57 | SetPageLRU(page); | ||
58 | return -ENOENT; | ||
59 | } | ||
60 | |||
61 | return 1; | ||
62 | } | ||
diff --git a/include/linux/swap.h b/include/linux/swap.h index e92054d6530b..d01f7efb0f2c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *)); | |||
167 | extern void FASTCALL(activate_page(struct page *)); | 167 | extern void FASTCALL(activate_page(struct page *)); |
168 | extern void FASTCALL(mark_page_accessed(struct page *)); | 168 | extern void FASTCALL(mark_page_accessed(struct page *)); |
169 | extern void lru_add_drain(void); | 169 | extern void lru_add_drain(void); |
170 | extern int lru_add_drain_all(void); | ||
170 | extern int rotate_reclaimable_page(struct page *page); | 171 | extern int rotate_reclaimable_page(struct page *page); |
171 | extern void swap_setup(void); | 172 | extern void swap_setup(void); |
172 | 173 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index a965b6b35f26..44da3d476994 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
94 | * ->private_lock (try_to_unmap_one) | 94 | * ->private_lock (try_to_unmap_one) |
95 | * ->tree_lock (try_to_unmap_one) | 95 | * ->tree_lock (try_to_unmap_one) |
96 | * ->zone.lru_lock (follow_page->mark_page_accessed) | 96 | * ->zone.lru_lock (follow_page->mark_page_accessed) |
97 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) | ||
97 | * ->private_lock (page_remove_rmap->set_page_dirty) | 98 | * ->private_lock (page_remove_rmap->set_page_dirty) |
98 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 99 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
99 | * ->inode_lock (page_remove_rmap->set_page_dirty) | 100 | * ->inode_lock (page_remove_rmap->set_page_dirty) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3171f884d245..551cde40520b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
208 | page = vm_normal_page(vma, addr, *pte); | 208 | page = vm_normal_page(vma, addr, *pte); |
209 | if (!page) | 209 | if (!page) |
210 | continue; | 210 | continue; |
211 | /* | ||
212 | * The check for PageReserved here is important to avoid | ||
213 | * handling zero pages and other pages that may have been | ||
214 | * marked special by the system. | ||
215 | * | ||
216 | * If the PageReserved would not be checked here then f.e. | ||
217 | * the location of the zero page could have an influence | ||
218 | * on MPOL_MF_STRICT, zero pages would be counted for | ||
219 | * the per node stats, and there would be useless attempts | ||
220 | * to put zero pages on the migration list. | ||
221 | */ | ||
211 | if (PageReserved(page)) | 222 | if (PageReserved(page)) |
212 | continue; | 223 | continue; |
213 | nid = page_to_nid(page); | 224 | nid = page_to_nid(page); |
@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
216 | 227 | ||
217 | if (flags & MPOL_MF_STATS) | 228 | if (flags & MPOL_MF_STATS) |
218 | gather_stats(page, private); | 229 | gather_stats(page, private); |
219 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 230 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
220 | spin_unlock(ptl); | ||
221 | migrate_page_add(vma, page, private, flags); | 231 | migrate_page_add(vma, page, private, flags); |
222 | spin_lock(ptl); | ||
223 | } | ||
224 | else | 232 | else |
225 | break; | 233 | break; |
226 | } while (pte++, addr += PAGE_SIZE, addr != end); | 234 | } while (pte++, addr += PAGE_SIZE, addr != end); |
@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
309 | int err; | 317 | int err; |
310 | struct vm_area_struct *first, *vma, *prev; | 318 | struct vm_area_struct *first, *vma, *prev; |
311 | 319 | ||
320 | /* Clear the LRU lists so pages can be isolated */ | ||
321 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | ||
322 | lru_add_drain_all(); | ||
323 | |||
312 | first = find_vma(mm, start); | 324 | first = find_vma(mm, start); |
313 | if (!first) | 325 | if (!first) |
314 | return ERR_PTR(-EFAULT); | 326 | return ERR_PTR(-EFAULT); |
@@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma, | |||
555 | if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || | 567 | if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || |
556 | mapping_writably_mapped(page->mapping) || | 568 | mapping_writably_mapped(page->mapping) || |
557 | single_mm_mapping(vma->vm_mm, page->mapping)) { | 569 | single_mm_mapping(vma->vm_mm, page->mapping)) { |
558 | int rc = isolate_lru_page(page); | 570 | if (isolate_lru_page(page)) |
559 | |||
560 | if (rc == 1) | ||
561 | list_add(&page->lru, pagelist); | 571 | list_add(&page->lru, pagelist); |
562 | /* | ||
563 | * If the isolate attempt was not successful then we just | ||
564 | * encountered an unswappable page. Something must be wrong. | ||
565 | */ | ||
566 | WARN_ON(rc == 0); | ||
567 | } | 572 | } |
568 | } | 573 | } |
569 | 574 | ||
@@ -33,7 +33,7 @@ | |||
33 | * mapping->i_mmap_lock | 33 | * mapping->i_mmap_lock |
34 | * anon_vma->lock | 34 | * anon_vma->lock |
35 | * mm->page_table_lock or pte_lock | 35 | * mm->page_table_lock or pte_lock |
36 | * zone->lru_lock (in mark_page_accessed) | 36 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) |
37 | * swap_lock (in swap_duplicate, swap_info_get) | 37 | * swap_lock (in swap_duplicate, swap_info_get) |
38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * mmlist_lock (in mmput, drain_mmlist and others) |
39 | * mapping->private_lock (in __set_page_dirty_buffers) | 39 | * mapping->private_lock (in __set_page_dirty_buffers) |
@@ -174,6 +174,32 @@ void lru_add_drain(void) | |||
174 | put_cpu(); | 174 | put_cpu(); |
175 | } | 175 | } |
176 | 176 | ||
177 | #ifdef CONFIG_NUMA | ||
178 | static void lru_add_drain_per_cpu(void *dummy) | ||
179 | { | ||
180 | lru_add_drain(); | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Returns 0 for success | ||
185 | */ | ||
186 | int lru_add_drain_all(void) | ||
187 | { | ||
188 | return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); | ||
189 | } | ||
190 | |||
191 | #else | ||
192 | |||
193 | /* | ||
194 | * Returns 0 for success | ||
195 | */ | ||
196 | int lru_add_drain_all(void) | ||
197 | { | ||
198 | lru_add_drain(); | ||
199 | return 0; | ||
200 | } | ||
201 | #endif | ||
202 | |||
177 | /* | 203 | /* |
178 | * This path almost never happens for VM activity - pages are normally | 204 | * This path almost never happens for VM activity - pages are normally |
179 | * freed via pagevecs. But it gets used by networking. | 205 | * freed via pagevecs. But it gets used by networking. |
diff --git a/mm/vmscan.c b/mm/vmscan.c index bf903b2d198f..827bf674577a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page) | |||
586 | } | 586 | } |
587 | 587 | ||
588 | /* | 588 | /* |
589 | * Add isolated pages on the list back to the LRU | 589 | * Add isolated pages on the list back to the LRU. |
590 | * | 590 | * |
591 | * returns the number of pages put back. | 591 | * returns the number of pages put back. |
592 | */ | 592 | */ |
@@ -760,46 +760,33 @@ next: | |||
760 | return nr_failed + retry; | 760 | return nr_failed + retry; |
761 | } | 761 | } |
762 | 762 | ||
763 | static void lru_add_drain_per_cpu(void *dummy) | ||
764 | { | ||
765 | lru_add_drain(); | ||
766 | } | ||
767 | |||
768 | /* | 763 | /* |
769 | * Isolate one page from the LRU lists and put it on the | 764 | * Isolate one page from the LRU lists and put it on the |
770 | * indicated list. Do necessary cache draining if the | 765 | * indicated list with elevated refcount. |
771 | * page is not on the LRU lists yet. | ||
772 | * | 766 | * |
773 | * Result: | 767 | * Result: |
774 | * 0 = page not on LRU list | 768 | * 0 = page not on LRU list |
775 | * 1 = page removed from LRU list and added to the specified list. | 769 | * 1 = page removed from LRU list and added to the specified list. |
776 | * -ENOENT = page is being freed elsewhere. | ||
777 | */ | 770 | */ |
778 | int isolate_lru_page(struct page *page) | 771 | int isolate_lru_page(struct page *page) |
779 | { | 772 | { |
780 | int rc = 0; | 773 | int ret = 0; |
781 | struct zone *zone = page_zone(page); | ||
782 | 774 | ||
783 | redo: | 775 | if (PageLRU(page)) { |
784 | spin_lock_irq(&zone->lru_lock); | 776 | struct zone *zone = page_zone(page); |
785 | rc = __isolate_lru_page(page); | 777 | spin_lock_irq(&zone->lru_lock); |
786 | if (rc == 1) { | 778 | if (TestClearPageLRU(page)) { |
787 | if (PageActive(page)) | 779 | ret = 1; |
788 | del_page_from_active_list(zone, page); | 780 | get_page(page); |
789 | else | 781 | if (PageActive(page)) |
790 | del_page_from_inactive_list(zone, page); | 782 | del_page_from_active_list(zone, page); |
791 | } | 783 | else |
792 | spin_unlock_irq(&zone->lru_lock); | 784 | del_page_from_inactive_list(zone, page); |
793 | if (rc == 0) { | 785 | } |
794 | /* | 786 | spin_unlock_irq(&zone->lru_lock); |
795 | * Maybe this page is still waiting for a cpu to drain it | ||
796 | * from one of the lru lists? | ||
797 | */ | ||
798 | rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); | ||
799 | if (rc == 0 && PageLRU(page)) | ||
800 | goto redo; | ||
801 | } | 787 | } |
802 | return rc; | 788 | |
789 | return ret; | ||
803 | } | 790 | } |
804 | #endif | 791 | #endif |
805 | 792 | ||
@@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, | |||
831 | page = lru_to_page(src); | 818 | page = lru_to_page(src); |
832 | prefetchw_prev_lru_page(page, src, flags); | 819 | prefetchw_prev_lru_page(page, src, flags); |
833 | 820 | ||
834 | switch (__isolate_lru_page(page)) { | 821 | if (!TestClearPageLRU(page)) |
835 | case 1: | ||
836 | /* Succeeded to isolate page */ | ||
837 | list_move(&page->lru, dst); | ||
838 | nr_taken++; | ||
839 | break; | ||
840 | case -ENOENT: | ||
841 | /* Not possible to isolate */ | ||
842 | list_move(&page->lru, src); | ||
843 | break; | ||
844 | default: | ||
845 | BUG(); | 822 | BUG(); |
823 | list_del(&page->lru); | ||
824 | if (get_page_testone(page)) { | ||
825 | /* | ||
826 | * It is being freed elsewhere | ||
827 | */ | ||
828 | __put_page(page); | ||
829 | SetPageLRU(page); | ||
830 | list_add(&page->lru, src); | ||
831 | continue; | ||
832 | } else { | ||
833 | list_add(&page->lru, dst); | ||
834 | nr_taken++; | ||
846 | } | 835 | } |
847 | } | 836 | } |
848 | 837 | ||