diff options
author | David Rientjes <rientjes@google.com> | 2008-04-28 05:12:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:19 -0400 |
commit | 028fec414d803117eb4b2ed12acb4dd5da65b32d (patch) | |
tree | 427f37ea0331369c1babc55c424c4fd2ac3b39f5 /mm/mempolicy.c | |
parent | a3b51e0142d1be156ac697eaadadd6cfbb7ba32b (diff) |
mempolicy: support optional mode flags
With the evolution of mempolicies, it is necessary to support mempolicy mode
flags that specify how the policy shall behave in certain circumstances. The
most immediate need for mode flag support is to suppress remapping the
nodemask of a policy at the time of rebind.
Both the mempolicy mode and flags are passed by the user in the 'int policy'
formal of either the set_mempolicy() or mbind() syscall. A new constant,
MPOL_MODE_FLAGS, represents the union of legal optional flags that may be
passed as part of this int. Mempolicies that include illegal flags as part of
their policy are rejected as invalid.
An additional member to struct mempolicy is added to support the mode flags:
struct mempolicy {
...
unsigned short policy;
unsigned short flags;
}
The splitting of the 'int' actual passed by the user is done in
sys_set_mempolicy() and sys_mbind() for their respective syscalls. This is
done by intersecting the actual with MPOL_MODE_FLAGS, rejecting the syscall of
there are additional flags, and storing it in the new 'flags' member of struct
mempolicy. The intersection of the actual with ~MPOL_MODE_FLAGS is stored in
the 'policy' member of the struct and all current users of pol->policy remain
unchanged.
The union of the policy mode and optional mode flags is passed back to the
user in get_mempolicy().
This combination of mode and flags within the same actual does not break
userspace code that relies on get_mempolicy(&policy, ...) and either
switch (policy) {
case MPOL_BIND:
...
case MPOL_INTERLEAVE:
...
};
statements or
if (policy == MPOL_INTERLEAVE) {
...
}
statements. Such applications would need to use optional mode flags when
calling set_mempolicy() or mbind() for these previously implemented statements
to stop working. If an application does start using optional mode flags, it
will need to mask the optional flags off the policy in switch and conditional
statements that only test mode.
An additional member is also added to struct shmem_sb_info to store the
optional mode flags.
[hugh@veritas.com: shmem mpol: fix build warning]
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 51 |
1 files changed, 31 insertions, 20 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1311dc4a3888..1f6ff9c1bbc3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -187,12 +187,13 @@ static int is_valid_nodemask(nodemask_t *nodemask) | |||
187 | } | 187 | } |
188 | 188 | ||
189 | /* Create a new policy */ | 189 | /* Create a new policy */ |
190 | static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes) | 190 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, |
191 | nodemask_t *nodes) | ||
191 | { | 192 | { |
192 | struct mempolicy *policy; | 193 | struct mempolicy *policy; |
193 | 194 | ||
194 | pr_debug("setting mode %d nodes[0] %lx\n", | 195 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
195 | mode, nodes ? nodes_addr(*nodes)[0] : -1); | 196 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
196 | 197 | ||
197 | if (mode == MPOL_DEFAULT) | 198 | if (mode == MPOL_DEFAULT) |
198 | return NULL; | 199 | return NULL; |
@@ -224,6 +225,7 @@ static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes) | |||
224 | BUG(); | 225 | BUG(); |
225 | } | 226 | } |
226 | policy->policy = mode; | 227 | policy->policy = mode; |
228 | policy->flags = flags; | ||
227 | policy->cpuset_mems_allowed = cpuset_mems_allowed(current); | 229 | policy->cpuset_mems_allowed = cpuset_mems_allowed(current); |
228 | return policy; | 230 | return policy; |
229 | } | 231 | } |
@@ -466,13 +468,14 @@ static void mpol_set_task_struct_flag(void) | |||
466 | } | 468 | } |
467 | 469 | ||
468 | /* Set the process memory policy */ | 470 | /* Set the process memory policy */ |
469 | static long do_set_mempolicy(unsigned short mode, nodemask_t *nodes) | 471 | static long do_set_mempolicy(unsigned short mode, unsigned short flags, |
472 | nodemask_t *nodes) | ||
470 | { | 473 | { |
471 | struct mempolicy *new; | 474 | struct mempolicy *new; |
472 | 475 | ||
473 | if (mpol_check_policy(mode, nodes)) | 476 | if (mpol_check_policy(mode, nodes)) |
474 | return -EINVAL; | 477 | return -EINVAL; |
475 | new = mpol_new(mode, nodes); | 478 | new = mpol_new(mode, flags, nodes); |
476 | if (IS_ERR(new)) | 479 | if (IS_ERR(new)) |
477 | return PTR_ERR(new); | 480 | return PTR_ERR(new); |
478 | mpol_free(current->mempolicy); | 481 | mpol_free(current->mempolicy); |
@@ -573,7 +576,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
573 | goto out; | 576 | goto out; |
574 | } | 577 | } |
575 | } else | 578 | } else |
576 | *policy = pol->policy; | 579 | *policy = pol->policy | pol->flags; |
577 | 580 | ||
578 | if (vma) { | 581 | if (vma) { |
579 | up_read(¤t->mm->mmap_sem); | 582 | up_read(¤t->mm->mmap_sem); |
@@ -763,8 +766,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * | |||
763 | #endif | 766 | #endif |
764 | 767 | ||
765 | static long do_mbind(unsigned long start, unsigned long len, | 768 | static long do_mbind(unsigned long start, unsigned long len, |
766 | unsigned short mode, nodemask_t *nmask, | 769 | unsigned short mode, unsigned short mode_flags, |
767 | unsigned long flags) | 770 | nodemask_t *nmask, unsigned long flags) |
768 | { | 771 | { |
769 | struct vm_area_struct *vma; | 772 | struct vm_area_struct *vma; |
770 | struct mm_struct *mm = current->mm; | 773 | struct mm_struct *mm = current->mm; |
@@ -796,7 +799,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
796 | if (mpol_check_policy(mode, nmask)) | 799 | if (mpol_check_policy(mode, nmask)) |
797 | return -EINVAL; | 800 | return -EINVAL; |
798 | 801 | ||
799 | new = mpol_new(mode, nmask); | 802 | new = mpol_new(mode, mode_flags, nmask); |
800 | if (IS_ERR(new)) | 803 | if (IS_ERR(new)) |
801 | return PTR_ERR(new); | 804 | return PTR_ERR(new); |
802 | 805 | ||
@@ -807,8 +810,9 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
807 | if (!new) | 810 | if (!new) |
808 | flags |= MPOL_MF_DISCONTIG_OK; | 811 | flags |= MPOL_MF_DISCONTIG_OK; |
809 | 812 | ||
810 | pr_debug("mbind %lx-%lx mode:%d nodes:%lx\n", start, start + len, | 813 | pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n", |
811 | mode, nmask ? nodes_addr(*nmask)[0] : -1); | 814 | start, start + len, mode, mode_flags, |
815 | nmask ? nodes_addr(*nmask)[0] : -1); | ||
812 | 816 | ||
813 | down_write(&mm->mmap_sem); | 817 | down_write(&mm->mmap_sem); |
814 | vma = check_range(mm, start, end, nmask, | 818 | vma = check_range(mm, start, end, nmask, |
@@ -907,13 +911,16 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, | |||
907 | { | 911 | { |
908 | nodemask_t nodes; | 912 | nodemask_t nodes; |
909 | int err; | 913 | int err; |
914 | unsigned short mode_flags; | ||
910 | 915 | ||
916 | mode_flags = mode & MPOL_MODE_FLAGS; | ||
917 | mode &= ~MPOL_MODE_FLAGS; | ||
911 | if (mode >= MPOL_MAX) | 918 | if (mode >= MPOL_MAX) |
912 | return -EINVAL; | 919 | return -EINVAL; |
913 | err = get_nodes(&nodes, nmask, maxnode); | 920 | err = get_nodes(&nodes, nmask, maxnode); |
914 | if (err) | 921 | if (err) |
915 | return err; | 922 | return err; |
916 | return do_mbind(start, len, mode, &nodes, flags); | 923 | return do_mbind(start, len, mode, mode_flags, &nodes, flags); |
917 | } | 924 | } |
918 | 925 | ||
919 | /* Set the process memory policy */ | 926 | /* Set the process memory policy */ |
@@ -922,13 +929,16 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | |||
922 | { | 929 | { |
923 | int err; | 930 | int err; |
924 | nodemask_t nodes; | 931 | nodemask_t nodes; |
932 | unsigned short flags; | ||
925 | 933 | ||
926 | if (mode < 0 || mode >= MPOL_MAX) | 934 | flags = mode & MPOL_MODE_FLAGS; |
935 | mode &= ~MPOL_MODE_FLAGS; | ||
936 | if ((unsigned int)mode >= MPOL_MAX) | ||
927 | return -EINVAL; | 937 | return -EINVAL; |
928 | err = get_nodes(&nodes, nmask, maxnode); | 938 | err = get_nodes(&nodes, nmask, maxnode); |
929 | if (err) | 939 | if (err) |
930 | return err; | 940 | return err; |
931 | return do_set_mempolicy(mode, &nodes); | 941 | return do_set_mempolicy(mode, flags, &nodes); |
932 | } | 942 | } |
933 | 943 | ||
934 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | 944 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, |
@@ -1641,7 +1651,7 @@ restart: | |||
1641 | } | 1651 | } |
1642 | 1652 | ||
1643 | void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, | 1653 | void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, |
1644 | nodemask_t *policy_nodes) | 1654 | unsigned short flags, nodemask_t *policy_nodes) |
1645 | { | 1655 | { |
1646 | info->root = RB_ROOT; | 1656 | info->root = RB_ROOT; |
1647 | spin_lock_init(&info->lock); | 1657 | spin_lock_init(&info->lock); |
@@ -1650,7 +1660,7 @@ void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, | |||
1650 | struct mempolicy *newpol; | 1660 | struct mempolicy *newpol; |
1651 | 1661 | ||
1652 | /* Falls back to MPOL_DEFAULT on any error */ | 1662 | /* Falls back to MPOL_DEFAULT on any error */ |
1653 | newpol = mpol_new(policy, policy_nodes); | 1663 | newpol = mpol_new(policy, flags, policy_nodes); |
1654 | if (!IS_ERR(newpol)) { | 1664 | if (!IS_ERR(newpol)) { |
1655 | /* Create pseudo-vma that contains just the policy */ | 1665 | /* Create pseudo-vma that contains just the policy */ |
1656 | struct vm_area_struct pvma; | 1666 | struct vm_area_struct pvma; |
@@ -1671,9 +1681,10 @@ int mpol_set_shared_policy(struct shared_policy *info, | |||
1671 | struct sp_node *new = NULL; | 1681 | struct sp_node *new = NULL; |
1672 | unsigned long sz = vma_pages(vma); | 1682 | unsigned long sz = vma_pages(vma); |
1673 | 1683 | ||
1674 | pr_debug("set_shared_policy %lx sz %lu %d %lx\n", | 1684 | pr_debug("set_shared_policy %lx sz %lu %d %d %lx\n", |
1675 | vma->vm_pgoff, | 1685 | vma->vm_pgoff, |
1676 | sz, npol? npol->policy : -1, | 1686 | sz, npol ? npol->policy : -1, |
1687 | npol ? npol->flags : -1, | ||
1677 | npol ? nodes_addr(npol->v.nodes)[0] : -1); | 1688 | npol ? nodes_addr(npol->v.nodes)[0] : -1); |
1678 | 1689 | ||
1679 | if (npol) { | 1690 | if (npol) { |
@@ -1746,14 +1757,14 @@ void __init numa_policy_init(void) | |||
1746 | if (unlikely(nodes_empty(interleave_nodes))) | 1757 | if (unlikely(nodes_empty(interleave_nodes))) |
1747 | node_set(prefer, interleave_nodes); | 1758 | node_set(prefer, interleave_nodes); |
1748 | 1759 | ||
1749 | if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes)) | 1760 | if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) |
1750 | printk("numa_policy_init: interleaving failed\n"); | 1761 | printk("numa_policy_init: interleaving failed\n"); |
1751 | } | 1762 | } |
1752 | 1763 | ||
1753 | /* Reset policy of current process to default */ | 1764 | /* Reset policy of current process to default */ |
1754 | void numa_default_policy(void) | 1765 | void numa_default_policy(void) |
1755 | { | 1766 | { |
1756 | do_set_mempolicy(MPOL_DEFAULT, NULL); | 1767 | do_set_mempolicy(MPOL_DEFAULT, 0, NULL); |
1757 | } | 1768 | } |
1758 | 1769 | ||
1759 | /* Migrate a policy to a different set of nodes */ | 1770 | /* Migrate a policy to a different set of nodes */ |