diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 169 |
1 files changed, 148 insertions, 21 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 73790188b0eb..3bd7fb7e4b75 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -95,6 +95,9 @@ | |||
| 95 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ | 95 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ |
| 96 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ | 96 | #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ |
| 97 | 97 | ||
| 98 | /* The number of pages to migrate per call to migrate_pages() */ | ||
| 99 | #define MIGRATE_CHUNK_SIZE 256 | ||
| 100 | |||
| 98 | static kmem_cache_t *policy_cache; | 101 | static kmem_cache_t *policy_cache; |
| 99 | static kmem_cache_t *sn_cache; | 102 | static kmem_cache_t *sn_cache; |
| 100 | 103 | ||
| @@ -543,24 +546,91 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, | |||
| 543 | } | 546 | } |
| 544 | } | 547 | } |
| 545 | 548 | ||
| 546 | static int swap_pages(struct list_head *pagelist) | 549 | /* |
| 550 | * Migrate the list 'pagelist' of pages to a certain destination. | ||
| 551 | * | ||
| 552 | * Specify destination with either non-NULL vma or dest_node >= 0 | ||
| 553 | * Return the number of pages not migrated or error code | ||
| 554 | */ | ||
| 555 | static int migrate_pages_to(struct list_head *pagelist, | ||
| 556 | struct vm_area_struct *vma, int dest) | ||
| 547 | { | 557 | { |
| 558 | LIST_HEAD(newlist); | ||
| 548 | LIST_HEAD(moved); | 559 | LIST_HEAD(moved); |
| 549 | LIST_HEAD(failed); | 560 | LIST_HEAD(failed); |
| 550 | int n; | 561 | int err = 0; |
| 562 | int nr_pages; | ||
| 563 | struct page *page; | ||
| 564 | struct list_head *p; | ||
| 551 | 565 | ||
| 552 | n = migrate_pages(pagelist, NULL, &moved, &failed); | 566 | redo: |
| 553 | putback_lru_pages(&failed); | 567 | nr_pages = 0; |
| 554 | putback_lru_pages(&moved); | 568 | list_for_each(p, pagelist) { |
| 569 | if (vma) | ||
| 570 | page = alloc_page_vma(GFP_HIGHUSER, vma, vma->vm_start); | ||
| 571 | else | ||
| 572 | page = alloc_pages_node(dest, GFP_HIGHUSER, 0); | ||
| 555 | 573 | ||
| 556 | return n; | 574 | if (!page) { |
| 575 | err = -ENOMEM; | ||
| 576 | goto out; | ||
| 577 | } | ||
| 578 | list_add(&page->lru, &newlist); | ||
| 579 | nr_pages++; | ||
| 580 | if (nr_pages > MIGRATE_CHUNK_SIZE); | ||
| 581 | break; | ||
| 582 | } | ||
| 583 | err = migrate_pages(pagelist, &newlist, &moved, &failed); | ||
| 584 | |||
| 585 | putback_lru_pages(&moved); /* Call release pages instead ?? */ | ||
| 586 | |||
| 587 | if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) | ||
| 588 | goto redo; | ||
| 589 | out: | ||
| 590 | /* Return leftover allocated pages */ | ||
| 591 | while (!list_empty(&newlist)) { | ||
| 592 | page = list_entry(newlist.next, struct page, lru); | ||
| 593 | list_del(&page->lru); | ||
| 594 | __free_page(page); | ||
| 595 | } | ||
| 596 | list_splice(&failed, pagelist); | ||
| 597 | if (err < 0) | ||
| 598 | return err; | ||
| 599 | |||
| 600 | /* Calculate number of leftover pages */ | ||
| 601 | nr_pages = 0; | ||
| 602 | list_for_each(p, pagelist) | ||
| 603 | nr_pages++; | ||
| 604 | return nr_pages; | ||
| 557 | } | 605 | } |
| 558 | 606 | ||
| 559 | /* | 607 | /* |
| 560 | * For now migrate_pages simply swaps out the pages from nodes that are in | 608 | * Migrate pages from one node to a target node. |
| 561 | * the source set but not in the target set. In the future, we would | 609 | * Returns error or the number of pages not migrated. |
| 562 | * want a function that moves pages between the two nodesets in such | 610 | */ |
| 563 | * a way as to preserve the physical layout as much as possible. | 611 | int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) |
| 612 | { | ||
| 613 | nodemask_t nmask; | ||
| 614 | LIST_HEAD(pagelist); | ||
| 615 | int err = 0; | ||
| 616 | |||
| 617 | nodes_clear(nmask); | ||
| 618 | node_set(source, nmask); | ||
| 619 | |||
| 620 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask, | ||
| 621 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | ||
| 622 | |||
| 623 | if (!list_empty(&pagelist)) { | ||
| 624 | err = migrate_pages_to(&pagelist, NULL, dest); | ||
| 625 | if (!list_empty(&pagelist)) | ||
| 626 | putback_lru_pages(&pagelist); | ||
| 627 | } | ||
| 628 | return err; | ||
| 629 | } | ||
| 630 | |||
| 631 | /* | ||
| 632 | * Move pages between the two nodesets so as to preserve the physical | ||
| 633 | * layout as much as possible. | ||
| 564 | * | 634 | * |
| 565 | * Returns the number of page that could not be moved. | 635 | * Returns the number of page that could not be moved. |
| 566 | */ | 636 | */ |
| @@ -568,22 +638,76 @@ int do_migrate_pages(struct mm_struct *mm, | |||
| 568 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | 638 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) |
| 569 | { | 639 | { |
| 570 | LIST_HEAD(pagelist); | 640 | LIST_HEAD(pagelist); |
| 571 | int count = 0; | 641 | int busy = 0; |
| 572 | nodemask_t nodes; | 642 | int err = 0; |
| 643 | nodemask_t tmp; | ||
| 573 | 644 | ||
| 574 | nodes_andnot(nodes, *from_nodes, *to_nodes); | 645 | down_read(&mm->mmap_sem); |
| 575 | 646 | ||
| 576 | down_read(&mm->mmap_sem); | 647 | /* |
| 577 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, | 648 | * Find a 'source' bit set in 'tmp' whose corresponding 'dest' |
| 578 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | 649 | * bit in 'to' is not also set in 'tmp'. Clear the found 'source' |
| 650 | * bit in 'tmp', and return that <source, dest> pair for migration. | ||
| 651 | * The pair of nodemasks 'to' and 'from' define the map. | ||
| 652 | * | ||
| 653 | * If no pair of bits is found that way, fallback to picking some | ||
| 654 | * pair of 'source' and 'dest' bits that are not the same. If the | ||
| 655 | * 'source' and 'dest' bits are the same, this represents a node | ||
| 656 | * that will be migrating to itself, so no pages need move. | ||
| 657 | * | ||
| 658 | * If no bits are left in 'tmp', or if all remaining bits left | ||
| 659 | * in 'tmp' correspond to the same bit in 'to', return false | ||
| 660 | * (nothing left to migrate). | ||
| 661 | * | ||
| 662 | * This lets us pick a pair of nodes to migrate between, such that | ||
| 663 | * if possible the dest node is not already occupied by some other | ||
| 664 | * source node, minimizing the risk of overloading the memory on a | ||
| 665 | * node that would happen if we migrated incoming memory to a node | ||
| 666 | * before migrating outgoing memory source that same node. | ||
| 667 | * | ||
| 668 | * A single scan of tmp is sufficient. As we go, we remember the | ||
| 669 | * most recent <s, d> pair that moved (s != d). If we find a pair | ||
| 670 | * that not only moved, but what's better, moved to an empty slot | ||
| 671 | * (d is not set in tmp), then we break out then, with that pair. | ||
| 672 | * Otherwise when we finish scannng from_tmp, we at least have the | ||
| 673 | * most recent <s, d> pair that moved. If we get all the way through | ||
| 674 | * the scan of tmp without finding any node that moved, much less | ||
| 675 | * moved to an empty node, then there is nothing left worth migrating. | ||
| 676 | */ | ||
| 579 | 677 | ||
| 580 | if (!list_empty(&pagelist)) { | 678 | tmp = *from_nodes; |
| 581 | count = swap_pages(&pagelist); | 679 | while (!nodes_empty(tmp)) { |
| 582 | putback_lru_pages(&pagelist); | 680 | int s,d; |
| 681 | int source = -1; | ||
| 682 | int dest = 0; | ||
| 683 | |||
| 684 | for_each_node_mask(s, tmp) { | ||
| 685 | d = node_remap(s, *from_nodes, *to_nodes); | ||
| 686 | if (s == d) | ||
| 687 | continue; | ||
| 688 | |||
| 689 | source = s; /* Node moved. Memorize */ | ||
| 690 | dest = d; | ||
| 691 | |||
| 692 | /* dest not in remaining from nodes? */ | ||
| 693 | if (!node_isset(dest, tmp)) | ||
| 694 | break; | ||
| 695 | } | ||
| 696 | if (source == -1) | ||
| 697 | break; | ||
| 698 | |||
| 699 | node_clear(source, tmp); | ||
| 700 | err = migrate_to_node(mm, source, dest, flags); | ||
| 701 | if (err > 0) | ||
| 702 | busy += err; | ||
| 703 | if (err < 0) | ||
| 704 | break; | ||
| 583 | } | 705 | } |
| 584 | 706 | ||
| 585 | up_read(&mm->mmap_sem); | 707 | up_read(&mm->mmap_sem); |
| 586 | return count; | 708 | if (err < 0) |
| 709 | return err; | ||
| 710 | return busy; | ||
| 587 | } | 711 | } |
| 588 | 712 | ||
| 589 | long do_mbind(unsigned long start, unsigned long len, | 713 | long do_mbind(unsigned long start, unsigned long len, |
| @@ -643,8 +767,9 @@ long do_mbind(unsigned long start, unsigned long len, | |||
| 643 | int nr_failed = 0; | 767 | int nr_failed = 0; |
| 644 | 768 | ||
| 645 | err = mbind_range(vma, start, end, new); | 769 | err = mbind_range(vma, start, end, new); |
| 770 | |||
| 646 | if (!list_empty(&pagelist)) | 771 | if (!list_empty(&pagelist)) |
| 647 | nr_failed = swap_pages(&pagelist); | 772 | nr_failed = migrate_pages_to(&pagelist, vma, -1); |
| 648 | 773 | ||
| 649 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | 774 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) |
| 650 | err = -EIO; | 775 | err = -EIO; |
| @@ -1034,6 +1159,7 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
| 1034 | return interleave_nodes(pol); | 1159 | return interleave_nodes(pol); |
| 1035 | } | 1160 | } |
| 1036 | 1161 | ||
| 1162 | #ifdef CONFIG_HUGETLBFS | ||
| 1037 | /* Return a zonelist suitable for a huge page allocation. */ | 1163 | /* Return a zonelist suitable for a huge page allocation. */ |
| 1038 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | 1164 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) |
| 1039 | { | 1165 | { |
| @@ -1047,6 +1173,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | |||
| 1047 | } | 1173 | } |
| 1048 | return zonelist_policy(GFP_HIGHUSER, pol); | 1174 | return zonelist_policy(GFP_HIGHUSER, pol); |
| 1049 | } | 1175 | } |
| 1176 | #endif | ||
| 1050 | 1177 | ||
| 1051 | /* Allocate a page in interleaved policy. | 1178 | /* Allocate a page in interleaved policy. |
| 1052 | Own path because it needs to do special accounting. */ | 1179 | Own path because it needs to do special accounting. */ |
