aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/mempolicy.c270
1 files changed, 138 insertions, 132 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4b077ec6c005..7051fe450e96 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -183,55 +183,9 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
183 return policy; 183 return policy;
184} 184}
185 185
186/* Check if we are the only process mapping the page in question */
187static inline int single_mm_mapping(struct mm_struct *mm,
188 struct address_space *mapping)
189{
190 struct vm_area_struct *vma;
191 struct prio_tree_iter iter;
192 int rc = 1;
193
194 spin_lock(&mapping->i_mmap_lock);
195 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
196 if (mm != vma->vm_mm) {
197 rc = 0;
198 goto out;
199 }
200 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
201 if (mm != vma->vm_mm) {
202 rc = 0;
203 goto out;
204 }
205out:
206 spin_unlock(&mapping->i_mmap_lock);
207 return rc;
208}
209
210/*
211 * Add a page to be migrated to the pagelist
212 */
213static void migrate_page_add(struct vm_area_struct *vma,
214 struct page *page, struct list_head *pagelist, unsigned long flags)
215{
216 /*
217 * Avoid migrating a page that is shared by others and not writable.
218 */
219 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
220 mapping_writably_mapped(page->mapping) ||
221 single_mm_mapping(vma->vm_mm, page->mapping)) {
222 int rc = isolate_lru_page(page);
223
224 if (rc == 1)
225 list_add(&page->lru, pagelist);
226 /*
227 * If the isolate attempt was not successful then we just
228 * encountered an unswappable page. Something must be wrong.
229 */
230 WARN_ON(rc == 0);
231 }
232}
233
234static void gather_stats(struct page *, void *); 186static void gather_stats(struct page *, void *);
187static void migrate_page_add(struct vm_area_struct *vma,
188 struct page *page, struct list_head *pagelist, unsigned long flags);
235 189
236/* Scan through pages checking if pages follow certain conditions. */ 190/* Scan through pages checking if pages follow certain conditions. */
237static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 191static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -440,90 +394,6 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
440 return mpol_check_policy(mode, nodes); 394 return mpol_check_policy(mode, nodes);
441} 395}
442 396
443static int swap_pages(struct list_head *pagelist)
444{
445 LIST_HEAD(moved);
446 LIST_HEAD(failed);
447 int n;
448
449 n = migrate_pages(pagelist, NULL, &moved, &failed);
450 putback_lru_pages(&failed);
451 putback_lru_pages(&moved);
452
453 return n;
454}
455
456long do_mbind(unsigned long start, unsigned long len,
457 unsigned long mode, nodemask_t *nmask, unsigned long flags)
458{
459 struct vm_area_struct *vma;
460 struct mm_struct *mm = current->mm;
461 struct mempolicy *new;
462 unsigned long end;
463 int err;
464 LIST_HEAD(pagelist);
465
466 if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
467 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
468 || mode > MPOL_MAX)
469 return -EINVAL;
470 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
471 return -EPERM;
472
473 if (start & ~PAGE_MASK)
474 return -EINVAL;
475
476 if (mode == MPOL_DEFAULT)
477 flags &= ~MPOL_MF_STRICT;
478
479 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
480 end = start + len;
481
482 if (end < start)
483 return -EINVAL;
484 if (end == start)
485 return 0;
486
487 if (mpol_check_policy(mode, nmask))
488 return -EINVAL;
489
490 new = mpol_new(mode, nmask);
491 if (IS_ERR(new))
492 return PTR_ERR(new);
493
494 /*
495 * If we are using the default policy then operation
496 * on discontinuous address spaces is okay after all
497 */
498 if (!new)
499 flags |= MPOL_MF_DISCONTIG_OK;
500
501 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
502 mode,nodes_addr(nodes)[0]);
503
504 down_write(&mm->mmap_sem);
505 vma = check_range(mm, start, end, nmask,
506 flags | MPOL_MF_INVERT, &pagelist);
507
508 err = PTR_ERR(vma);
509 if (!IS_ERR(vma)) {
510 int nr_failed = 0;
511
512 err = mbind_range(vma, start, end, new);
513 if (!list_empty(&pagelist))
514 nr_failed = swap_pages(&pagelist);
515
516 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
517 err = -EIO;
518 }
519 if (!list_empty(&pagelist))
520 putback_lru_pages(&pagelist);
521
522 up_write(&mm->mmap_sem);
523 mpol_free(new);
524 return err;
525}
526
527/* Set the process memory policy */ 397/* Set the process memory policy */
528long do_set_mempolicy(int mode, nodemask_t *nodes) 398long do_set_mempolicy(int mode, nodemask_t *nodes)
529{ 399{
@@ -644,6 +514,71 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
644} 514}
645 515
646/* 516/*
517 * page migration
518 */
519
520/* Check if we are the only process mapping the page in question */
521static inline int single_mm_mapping(struct mm_struct *mm,
522 struct address_space *mapping)
523{
524 struct vm_area_struct *vma;
525 struct prio_tree_iter iter;
526 int rc = 1;
527
528 spin_lock(&mapping->i_mmap_lock);
529 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
530 if (mm != vma->vm_mm) {
531 rc = 0;
532 goto out;
533 }
534 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
535 if (mm != vma->vm_mm) {
536 rc = 0;
537 goto out;
538 }
539out:
540 spin_unlock(&mapping->i_mmap_lock);
541 return rc;
542}
543
544/*
545 * Add a page to be migrated to the pagelist
546 */
547static void migrate_page_add(struct vm_area_struct *vma,
548 struct page *page, struct list_head *pagelist, unsigned long flags)
549{
550 /*
551 * Avoid migrating a page that is shared by others and not writable.
552 */
553 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
554 mapping_writably_mapped(page->mapping) ||
555 single_mm_mapping(vma->vm_mm, page->mapping)) {
556 int rc = isolate_lru_page(page);
557
558 if (rc == 1)
559 list_add(&page->lru, pagelist);
560 /*
561 * If the isolate attempt was not successful then we just
562 * encountered an unswappable page. Something must be wrong.
563 */
564 WARN_ON(rc == 0);
565 }
566}
567
568static int swap_pages(struct list_head *pagelist)
569{
570 LIST_HEAD(moved);
571 LIST_HEAD(failed);
572 int n;
573
574 n = migrate_pages(pagelist, NULL, &moved, &failed);
575 putback_lru_pages(&failed);
576 putback_lru_pages(&moved);
577
578 return n;
579}
580
581/*
647 * For now migrate_pages simply swaps out the pages from nodes that are in 582 * For now migrate_pages simply swaps out the pages from nodes that are in
648 * the source set but not in the target set. In the future, we would 583 * the source set but not in the target set. In the future, we would
649 * want a function that moves pages between the two nodesets in such 584 * want a function that moves pages between the two nodesets in such
@@ -673,6 +608,77 @@ int do_migrate_pages(struct mm_struct *mm,
673 return count; 608 return count;
674} 609}
675 610
611long do_mbind(unsigned long start, unsigned long len,
612 unsigned long mode, nodemask_t *nmask, unsigned long flags)
613{
614 struct vm_area_struct *vma;
615 struct mm_struct *mm = current->mm;
616 struct mempolicy *new;
617 unsigned long end;
618 int err;
619 LIST_HEAD(pagelist);
620
621 if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
622 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
623 || mode > MPOL_MAX)
624 return -EINVAL;
625 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
626 return -EPERM;
627
628 if (start & ~PAGE_MASK)
629 return -EINVAL;
630
631 if (mode == MPOL_DEFAULT)
632 flags &= ~MPOL_MF_STRICT;
633
634 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
635 end = start + len;
636
637 if (end < start)
638 return -EINVAL;
639 if (end == start)
640 return 0;
641
642 if (mpol_check_policy(mode, nmask))
643 return -EINVAL;
644
645 new = mpol_new(mode, nmask);
646 if (IS_ERR(new))
647 return PTR_ERR(new);
648
649 /*
650 * If we are using the default policy then operation
651 * on discontinuous address spaces is okay after all
652 */
653 if (!new)
654 flags |= MPOL_MF_DISCONTIG_OK;
655
656 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
657 mode,nodes_addr(nodes)[0]);
658
659 down_write(&mm->mmap_sem);
660 vma = check_range(mm, start, end, nmask,
661 flags | MPOL_MF_INVERT, &pagelist);
662
663 err = PTR_ERR(vma);
664 if (!IS_ERR(vma)) {
665 int nr_failed = 0;
666
667 err = mbind_range(vma, start, end, new);
668 if (!list_empty(&pagelist))
669 nr_failed = swap_pages(&pagelist);
670
671 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
672 err = -EIO;
673 }
674 if (!list_empty(&pagelist))
675 putback_lru_pages(&pagelist);
676
677 up_write(&mm->mmap_sem);
678 mpol_free(new);
679 return err;
680}
681
676/* 682/*
677 * User space interface with variable sized bitmaps for nodelists. 683 * User space interface with variable sized bitmaps for nodelists.
678 */ 684 */