diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 99 |
1 files changed, 51 insertions, 48 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3171f884d245..73790188b0eb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -185,8 +185,8 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) | |||
185 | } | 185 | } |
186 | 186 | ||
187 | static void gather_stats(struct page *, void *); | 187 | static void gather_stats(struct page *, void *); |
188 | static void migrate_page_add(struct vm_area_struct *vma, | 188 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
189 | struct page *page, struct list_head *pagelist, unsigned long flags); | 189 | unsigned long flags); |
190 | 190 | ||
191 | /* Scan through pages checking if pages follow certain conditions. */ | 191 | /* Scan through pages checking if pages follow certain conditions. */ |
192 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 192 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
208 | page = vm_normal_page(vma, addr, *pte); | 208 | page = vm_normal_page(vma, addr, *pte); |
209 | if (!page) | 209 | if (!page) |
210 | continue; | 210 | continue; |
211 | /* | ||
212 | * The check for PageReserved here is important to avoid | ||
213 | * handling zero pages and other pages that may have been | ||
214 | * marked special by the system. | ||
215 | * | ||
216 | * If the PageReserved would not be checked here then f.e. | ||
217 | * the location of the zero page could have an influence | ||
218 | * on MPOL_MF_STRICT, zero pages would be counted for | ||
219 | * the per node stats, and there would be useless attempts | ||
220 | * to put zero pages on the migration list. | ||
221 | */ | ||
211 | if (PageReserved(page)) | 222 | if (PageReserved(page)) |
212 | continue; | 223 | continue; |
213 | nid = page_to_nid(page); | 224 | nid = page_to_nid(page); |
@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
216 | 227 | ||
217 | if (flags & MPOL_MF_STATS) | 228 | if (flags & MPOL_MF_STATS) |
218 | gather_stats(page, private); | 229 | gather_stats(page, private); |
219 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 230 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
220 | spin_unlock(ptl); | 231 | migrate_page_add(page, private, flags); |
221 | migrate_page_add(vma, page, private, flags); | ||
222 | spin_lock(ptl); | ||
223 | } | ||
224 | else | 232 | else |
225 | break; | 233 | break; |
226 | } while (pte++, addr += PAGE_SIZE, addr != end); | 234 | } while (pte++, addr += PAGE_SIZE, addr != end); |
@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
309 | int err; | 317 | int err; |
310 | struct vm_area_struct *first, *vma, *prev; | 318 | struct vm_area_struct *first, *vma, *prev; |
311 | 319 | ||
320 | /* Clear the LRU lists so pages can be isolated */ | ||
321 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | ||
322 | lru_add_drain_all(); | ||
323 | |||
312 | first = find_vma(mm, start); | 324 | first = find_vma(mm, start); |
313 | if (!first) | 325 | if (!first) |
314 | return ERR_PTR(-EFAULT); | 326 | return ERR_PTR(-EFAULT); |
@@ -519,51 +531,15 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
519 | * page migration | 531 | * page migration |
520 | */ | 532 | */ |
521 | 533 | ||
522 | /* Check if we are the only process mapping the page in question */ | 534 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
523 | static inline int single_mm_mapping(struct mm_struct *mm, | 535 | unsigned long flags) |
524 | struct address_space *mapping) | ||
525 | { | ||
526 | struct vm_area_struct *vma; | ||
527 | struct prio_tree_iter iter; | ||
528 | int rc = 1; | ||
529 | |||
530 | spin_lock(&mapping->i_mmap_lock); | ||
531 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) | ||
532 | if (mm != vma->vm_mm) { | ||
533 | rc = 0; | ||
534 | goto out; | ||
535 | } | ||
536 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) | ||
537 | if (mm != vma->vm_mm) { | ||
538 | rc = 0; | ||
539 | goto out; | ||
540 | } | ||
541 | out: | ||
542 | spin_unlock(&mapping->i_mmap_lock); | ||
543 | return rc; | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * Add a page to be migrated to the pagelist | ||
548 | */ | ||
549 | static void migrate_page_add(struct vm_area_struct *vma, | ||
550 | struct page *page, struct list_head *pagelist, unsigned long flags) | ||
551 | { | 536 | { |
552 | /* | 537 | /* |
553 | * Avoid migrating a page that is shared by others and not writable. | 538 | * Avoid migrating a page that is shared with others. |
554 | */ | 539 | */ |
555 | if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || | 540 | if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { |
556 | mapping_writably_mapped(page->mapping) || | 541 | if (isolate_lru_page(page)) |
557 | single_mm_mapping(vma->vm_mm, page->mapping)) { | ||
558 | int rc = isolate_lru_page(page); | ||
559 | |||
560 | if (rc == 1) | ||
561 | list_add(&page->lru, pagelist); | 542 | list_add(&page->lru, pagelist); |
562 | /* | ||
563 | * If the isolate attempt was not successful then we just | ||
564 | * encountered an unswappable page. Something must be wrong. | ||
565 | */ | ||
566 | WARN_ON(rc == 0); | ||
567 | } | 543 | } |
568 | } | 544 | } |
569 | 545 | ||
@@ -1000,6 +976,33 @@ static unsigned interleave_nodes(struct mempolicy *policy) | |||
1000 | return nid; | 976 | return nid; |
1001 | } | 977 | } |
1002 | 978 | ||
979 | /* | ||
980 | * Depending on the memory policy provide a node from which to allocate the | ||
981 | * next slab entry. | ||
982 | */ | ||
983 | unsigned slab_node(struct mempolicy *policy) | ||
984 | { | ||
985 | switch (policy->policy) { | ||
986 | case MPOL_INTERLEAVE: | ||
987 | return interleave_nodes(policy); | ||
988 | |||
989 | case MPOL_BIND: | ||
990 | /* | ||
991 | * Follow bind policy behavior and start allocation at the | ||
992 | * first node. | ||
993 | */ | ||
994 | return policy->v.zonelist->zones[0]->zone_pgdat->node_id; | ||
995 | |||
996 | case MPOL_PREFERRED: | ||
997 | if (policy->v.preferred_node >= 0) | ||
998 | return policy->v.preferred_node; | ||
999 | /* Fall through */ | ||
1000 | |||
1001 | default: | ||
1002 | return numa_node_id(); | ||
1003 | } | ||
1004 | } | ||
1005 | |||
1003 | /* Do static interleaving for a VMA with known offset. */ | 1006 | /* Do static interleaving for a VMA with known offset. */ |
1004 | static unsigned offset_il_node(struct mempolicy *pol, | 1007 | static unsigned offset_il_node(struct mempolicy *pol, |
1005 | struct vm_area_struct *vma, unsigned long off) | 1008 | struct vm_area_struct *vma, unsigned long off) |