aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2006-01-18 20:42:27 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-18 22:20:17 -0500
commit053837fce7aa79025ed57656855df09f80175527 (patch)
tree05d7615894131a368fc4943f641b11acdd2ae694
parente236a166b2bc437769a9b8b5d19186a3761bde48 (diff)
[PATCH] mm: migration page refcounting fix
Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm_inline.h21
-rw-r--r--include/linux/swap.h1
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/mempolicy.c29
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/swap.c26
-rw-r--r--mm/vmscan.c71
7 files changed, 76 insertions, 75 deletions
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 49cc68af01f8..8ac854f7f190 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page)
39 } 39 }
40} 40}
41 41
42/*
43 * Isolate one page from the LRU lists.
44 *
45 * - zone->lru_lock must be held
46 */
47static inline int __isolate_lru_page(struct page *page)
48{
49 if (unlikely(!TestClearPageLRU(page)))
50 return 0;
51
52 if (get_page_testone(page)) {
53 /*
54 * It is being freed elsewhere
55 */
56 __put_page(page);
57 SetPageLRU(page);
58 return -ENOENT;
59 }
60
61 return 1;
62}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e92054d6530b..d01f7efb0f2c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *));
167extern void FASTCALL(activate_page(struct page *)); 167extern void FASTCALL(activate_page(struct page *));
168extern void FASTCALL(mark_page_accessed(struct page *)); 168extern void FASTCALL(mark_page_accessed(struct page *));
169extern void lru_add_drain(void); 169extern void lru_add_drain(void);
170extern int lru_add_drain_all(void);
170extern int rotate_reclaimable_page(struct page *page); 171extern int rotate_reclaimable_page(struct page *page);
171extern void swap_setup(void); 172extern void swap_setup(void);
172 173
diff --git a/mm/filemap.c b/mm/filemap.c
index a965b6b35f26..44da3d476994 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
94 * ->private_lock (try_to_unmap_one) 94 * ->private_lock (try_to_unmap_one)
95 * ->tree_lock (try_to_unmap_one) 95 * ->tree_lock (try_to_unmap_one)
96 * ->zone.lru_lock (follow_page->mark_page_accessed) 96 * ->zone.lru_lock (follow_page->mark_page_accessed)
97 * ->zone.lru_lock (check_pte_range->isolate_lru_page)
97 * ->private_lock (page_remove_rmap->set_page_dirty) 98 * ->private_lock (page_remove_rmap->set_page_dirty)
98 * ->tree_lock (page_remove_rmap->set_page_dirty) 99 * ->tree_lock (page_remove_rmap->set_page_dirty)
99 * ->inode_lock (page_remove_rmap->set_page_dirty) 100 * ->inode_lock (page_remove_rmap->set_page_dirty)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3171f884d245..551cde40520b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
208 page = vm_normal_page(vma, addr, *pte); 208 page = vm_normal_page(vma, addr, *pte);
209 if (!page) 209 if (!page)
210 continue; 210 continue;
211 /*
212 * The check for PageReserved here is important to avoid
213 * handling zero pages and other pages that may have been
214 * marked special by the system.
215 *
216 * If the PageReserved would not be checked here then f.e.
217 * the location of the zero page could have an influence
218 * on MPOL_MF_STRICT, zero pages would be counted for
219 * the per node stats, and there would be useless attempts
220 * to put zero pages on the migration list.
221 */
211 if (PageReserved(page)) 222 if (PageReserved(page))
212 continue; 223 continue;
213 nid = page_to_nid(page); 224 nid = page_to_nid(page);
@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
216 227
217 if (flags & MPOL_MF_STATS) 228 if (flags & MPOL_MF_STATS)
218 gather_stats(page, private); 229 gather_stats(page, private);
219 else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { 230 else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
220 spin_unlock(ptl);
221 migrate_page_add(vma, page, private, flags); 231 migrate_page_add(vma, page, private, flags);
222 spin_lock(ptl);
223 }
224 else 232 else
225 break; 233 break;
226 } while (pte++, addr += PAGE_SIZE, addr != end); 234 } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
309 int err; 317 int err;
310 struct vm_area_struct *first, *vma, *prev; 318 struct vm_area_struct *first, *vma, *prev;
311 319
320 /* Clear the LRU lists so pages can be isolated */
321 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
322 lru_add_drain_all();
323
312 first = find_vma(mm, start); 324 first = find_vma(mm, start);
313 if (!first) 325 if (!first)
314 return ERR_PTR(-EFAULT); 326 return ERR_PTR(-EFAULT);
@@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma,
555 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || 567 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
556 mapping_writably_mapped(page->mapping) || 568 mapping_writably_mapped(page->mapping) ||
557 single_mm_mapping(vma->vm_mm, page->mapping)) { 569 single_mm_mapping(vma->vm_mm, page->mapping)) {
558 int rc = isolate_lru_page(page); 570 if (isolate_lru_page(page))
559
560 if (rc == 1)
561 list_add(&page->lru, pagelist); 571 list_add(&page->lru, pagelist);
562 /*
563 * If the isolate attempt was not successful then we just
564 * encountered an unswappable page. Something must be wrong.
565 */
566 WARN_ON(rc == 0);
567 } 572 }
568} 573}
569 574
diff --git a/mm/rmap.c b/mm/rmap.c
index dfbb89f99a15..d85a99d28c03 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -33,7 +33,7 @@
33 * mapping->i_mmap_lock 33 * mapping->i_mmap_lock
34 * anon_vma->lock 34 * anon_vma->lock
35 * mm->page_table_lock or pte_lock 35 * mm->page_table_lock or pte_lock
36 * zone->lru_lock (in mark_page_accessed) 36 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
37 * swap_lock (in swap_duplicate, swap_info_get) 37 * swap_lock (in swap_duplicate, swap_info_get)
38 * mmlist_lock (in mmput, drain_mmlist and others) 38 * mmlist_lock (in mmput, drain_mmlist and others)
39 * mapping->private_lock (in __set_page_dirty_buffers) 39 * mapping->private_lock (in __set_page_dirty_buffers)
diff --git a/mm/swap.c b/mm/swap.c
index cbb48e721ab9..bc2442a7b0ee 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -174,6 +174,32 @@ void lru_add_drain(void)
174 put_cpu(); 174 put_cpu();
175} 175}
176 176
177#ifdef CONFIG_NUMA
178static void lru_add_drain_per_cpu(void *dummy)
179{
180 lru_add_drain();
181}
182
183/*
184 * Returns 0 for success
185 */
186int lru_add_drain_all(void)
187{
188 return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
189}
190
191#else
192
193/*
194 * Returns 0 for success
195 */
196int lru_add_drain_all(void)
197{
198 lru_add_drain();
199 return 0;
200}
201#endif
202
177/* 203/*
178 * This path almost never happens for VM activity - pages are normally 204 * This path almost never happens for VM activity - pages are normally
179 * freed via pagevecs. But it gets used by networking. 205 * freed via pagevecs. But it gets used by networking.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bf903b2d198f..827bf674577a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page)
586} 586}
587 587
588/* 588/*
589 * Add isolated pages on the list back to the LRU 589 * Add isolated pages on the list back to the LRU.
590 * 590 *
591 * returns the number of pages put back. 591 * returns the number of pages put back.
592 */ 592 */
@@ -760,46 +760,33 @@ next:
760 return nr_failed + retry; 760 return nr_failed + retry;
761} 761}
762 762
763static void lru_add_drain_per_cpu(void *dummy)
764{
765 lru_add_drain();
766}
767
768/* 763/*
769 * Isolate one page from the LRU lists and put it on the 764 * Isolate one page from the LRU lists and put it on the
770 * indicated list. Do necessary cache draining if the 765 * indicated list with elevated refcount.
771 * page is not on the LRU lists yet.
772 * 766 *
773 * Result: 767 * Result:
774 * 0 = page not on LRU list 768 * 0 = page not on LRU list
775 * 1 = page removed from LRU list and added to the specified list. 769 * 1 = page removed from LRU list and added to the specified list.
776 * -ENOENT = page is being freed elsewhere.
777 */ 770 */
778int isolate_lru_page(struct page *page) 771int isolate_lru_page(struct page *page)
779{ 772{
780 int rc = 0; 773 int ret = 0;
781 struct zone *zone = page_zone(page);
782 774
783redo: 775 if (PageLRU(page)) {
784 spin_lock_irq(&zone->lru_lock); 776 struct zone *zone = page_zone(page);
785 rc = __isolate_lru_page(page); 777 spin_lock_irq(&zone->lru_lock);
786 if (rc == 1) { 778 if (TestClearPageLRU(page)) {
787 if (PageActive(page)) 779 ret = 1;
788 del_page_from_active_list(zone, page); 780 get_page(page);
789 else 781 if (PageActive(page))
790 del_page_from_inactive_list(zone, page); 782 del_page_from_active_list(zone, page);
791 } 783 else
792 spin_unlock_irq(&zone->lru_lock); 784 del_page_from_inactive_list(zone, page);
793 if (rc == 0) { 785 }
794 /* 786 spin_unlock_irq(&zone->lru_lock);
795 * Maybe this page is still waiting for a cpu to drain it
796 * from one of the lru lists?
797 */
798 rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
799 if (rc == 0 && PageLRU(page))
800 goto redo;
801 } 787 }
802 return rc; 788
789 return ret;
803} 790}
804#endif 791#endif
805 792
@@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
831 page = lru_to_page(src); 818 page = lru_to_page(src);
832 prefetchw_prev_lru_page(page, src, flags); 819 prefetchw_prev_lru_page(page, src, flags);
833 820
834 switch (__isolate_lru_page(page)) { 821 if (!TestClearPageLRU(page))
835 case 1:
836 /* Succeeded to isolate page */
837 list_move(&page->lru, dst);
838 nr_taken++;
839 break;
840 case -ENOENT:
841 /* Not possible to isolate */
842 list_move(&page->lru, src);
843 break;
844 default:
845 BUG(); 822 BUG();
823 list_del(&page->lru);
824 if (get_page_testone(page)) {
825 /*
826 * It is being freed elsewhere
827 */
828 __put_page(page);
829 SetPageLRU(page);
830 list_add(&page->lru, src);
831 continue;
832 } else {
833 list_add(&page->lru, dst);
834 nr_taken++;
846 } 835 }
847 } 836 }
848 837