aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2008-04-28 05:12:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commit028fec414d803117eb4b2ed12acb4dd5da65b32d (patch)
tree427f37ea0331369c1babc55c424c4fd2ac3b39f5 /mm/mempolicy.c
parenta3b51e0142d1be156ac697eaadadd6cfbb7ba32b (diff)
mempolicy: support optional mode flags
With the evolution of mempolicies, it is necessary to support mempolicy mode flags that specify how the policy shall behave in certain circumstances. The most immediate need for mode flag support is to suppress remapping the nodemask of a policy at the time of rebind. Both the mempolicy mode and flags are passed by the user in the 'int policy' formal of either the set_mempolicy() or mbind() syscall. A new constant, MPOL_MODE_FLAGS, represents the union of legal optional flags that may be passed as part of this int. Mempolicies that include illegal flags as part of their policy are rejected as invalid. An additional member to struct mempolicy is added to support the mode flags: struct mempolicy { ... unsigned short policy; unsigned short flags; } The splitting of the 'int' actual passed by the user is done in sys_set_mempolicy() and sys_mbind() for their respective syscalls. This is done by intersecting the actual with MPOL_MODE_FLAGS, rejecting the syscall of there are additional flags, and storing it in the new 'flags' member of struct mempolicy. The intersection of the actual with ~MPOL_MODE_FLAGS is stored in the 'policy' member of the struct and all current users of pol->policy remain unchanged. The union of the policy mode and optional mode flags is passed back to the user in get_mempolicy(). This combination of mode and flags within the same actual does not break userspace code that relies on get_mempolicy(&policy, ...) and either switch (policy) { case MPOL_BIND: ... case MPOL_INTERLEAVE: ... }; statements or if (policy == MPOL_INTERLEAVE) { ... } statements. Such applications would need to use optional mode flags when calling set_mempolicy() or mbind() for these previously implemented statements to stop working. If an application does start using optional mode flags, it will need to mask the optional flags off the policy in switch and conditional statements that only test mode. An additional member is also added to struct shmem_sb_info to store the optional mode flags. [hugh@veritas.com: shmem mpol: fix build warning] Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c51
1 files changed, 31 insertions, 20 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1311dc4a3888..1f6ff9c1bbc3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -187,12 +187,13 @@ static int is_valid_nodemask(nodemask_t *nodemask)
187} 187}
188 188
189/* Create a new policy */ 189/* Create a new policy */
190static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes) 190static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
191 nodemask_t *nodes)
191{ 192{
192 struct mempolicy *policy; 193 struct mempolicy *policy;
193 194
194 pr_debug("setting mode %d nodes[0] %lx\n", 195 pr_debug("setting mode %d flags %d nodes[0] %lx\n",
195 mode, nodes ? nodes_addr(*nodes)[0] : -1); 196 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
196 197
197 if (mode == MPOL_DEFAULT) 198 if (mode == MPOL_DEFAULT)
198 return NULL; 199 return NULL;
@@ -224,6 +225,7 @@ static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes)
224 BUG(); 225 BUG();
225 } 226 }
226 policy->policy = mode; 227 policy->policy = mode;
228 policy->flags = flags;
227 policy->cpuset_mems_allowed = cpuset_mems_allowed(current); 229 policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
228 return policy; 230 return policy;
229} 231}
@@ -466,13 +468,14 @@ static void mpol_set_task_struct_flag(void)
466} 468}
467 469
468/* Set the process memory policy */ 470/* Set the process memory policy */
469static long do_set_mempolicy(unsigned short mode, nodemask_t *nodes) 471static long do_set_mempolicy(unsigned short mode, unsigned short flags,
472 nodemask_t *nodes)
470{ 473{
471 struct mempolicy *new; 474 struct mempolicy *new;
472 475
473 if (mpol_check_policy(mode, nodes)) 476 if (mpol_check_policy(mode, nodes))
474 return -EINVAL; 477 return -EINVAL;
475 new = mpol_new(mode, nodes); 478 new = mpol_new(mode, flags, nodes);
476 if (IS_ERR(new)) 479 if (IS_ERR(new))
477 return PTR_ERR(new); 480 return PTR_ERR(new);
478 mpol_free(current->mempolicy); 481 mpol_free(current->mempolicy);
@@ -573,7 +576,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
573 goto out; 576 goto out;
574 } 577 }
575 } else 578 } else
576 *policy = pol->policy; 579 *policy = pol->policy | pol->flags;
577 580
578 if (vma) { 581 if (vma) {
579 up_read(&current->mm->mmap_sem); 582 up_read(&current->mm->mmap_sem);
@@ -763,8 +766,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
763#endif 766#endif
764 767
765static long do_mbind(unsigned long start, unsigned long len, 768static long do_mbind(unsigned long start, unsigned long len,
766 unsigned short mode, nodemask_t *nmask, 769 unsigned short mode, unsigned short mode_flags,
767 unsigned long flags) 770 nodemask_t *nmask, unsigned long flags)
768{ 771{
769 struct vm_area_struct *vma; 772 struct vm_area_struct *vma;
770 struct mm_struct *mm = current->mm; 773 struct mm_struct *mm = current->mm;
@@ -796,7 +799,7 @@ static long do_mbind(unsigned long start, unsigned long len,
796 if (mpol_check_policy(mode, nmask)) 799 if (mpol_check_policy(mode, nmask))
797 return -EINVAL; 800 return -EINVAL;
798 801
799 new = mpol_new(mode, nmask); 802 new = mpol_new(mode, mode_flags, nmask);
800 if (IS_ERR(new)) 803 if (IS_ERR(new))
801 return PTR_ERR(new); 804 return PTR_ERR(new);
802 805
@@ -807,8 +810,9 @@ static long do_mbind(unsigned long start, unsigned long len,
807 if (!new) 810 if (!new)
808 flags |= MPOL_MF_DISCONTIG_OK; 811 flags |= MPOL_MF_DISCONTIG_OK;
809 812
810 pr_debug("mbind %lx-%lx mode:%d nodes:%lx\n", start, start + len, 813 pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",
811 mode, nmask ? nodes_addr(*nmask)[0] : -1); 814 start, start + len, mode, mode_flags,
815 nmask ? nodes_addr(*nmask)[0] : -1);
812 816
813 down_write(&mm->mmap_sem); 817 down_write(&mm->mmap_sem);
814 vma = check_range(mm, start, end, nmask, 818 vma = check_range(mm, start, end, nmask,
@@ -907,13 +911,16 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
907{ 911{
908 nodemask_t nodes; 912 nodemask_t nodes;
909 int err; 913 int err;
914 unsigned short mode_flags;
910 915
916 mode_flags = mode & MPOL_MODE_FLAGS;
917 mode &= ~MPOL_MODE_FLAGS;
911 if (mode >= MPOL_MAX) 918 if (mode >= MPOL_MAX)
912 return -EINVAL; 919 return -EINVAL;
913 err = get_nodes(&nodes, nmask, maxnode); 920 err = get_nodes(&nodes, nmask, maxnode);
914 if (err) 921 if (err)
915 return err; 922 return err;
916 return do_mbind(start, len, mode, &nodes, flags); 923 return do_mbind(start, len, mode, mode_flags, &nodes, flags);
917} 924}
918 925
919/* Set the process memory policy */ 926/* Set the process memory policy */
@@ -922,13 +929,16 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
922{ 929{
923 int err; 930 int err;
924 nodemask_t nodes; 931 nodemask_t nodes;
932 unsigned short flags;
925 933
926 if (mode < 0 || mode >= MPOL_MAX) 934 flags = mode & MPOL_MODE_FLAGS;
935 mode &= ~MPOL_MODE_FLAGS;
936 if ((unsigned int)mode >= MPOL_MAX)
927 return -EINVAL; 937 return -EINVAL;
928 err = get_nodes(&nodes, nmask, maxnode); 938 err = get_nodes(&nodes, nmask, maxnode);
929 if (err) 939 if (err)
930 return err; 940 return err;
931 return do_set_mempolicy(mode, &nodes); 941 return do_set_mempolicy(mode, flags, &nodes);
932} 942}
933 943
934asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, 944asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
@@ -1641,7 +1651,7 @@ restart:
1641} 1651}
1642 1652
1643void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, 1653void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
1644 nodemask_t *policy_nodes) 1654 unsigned short flags, nodemask_t *policy_nodes)
1645{ 1655{
1646 info->root = RB_ROOT; 1656 info->root = RB_ROOT;
1647 spin_lock_init(&info->lock); 1657 spin_lock_init(&info->lock);
@@ -1650,7 +1660,7 @@ void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
1650 struct mempolicy *newpol; 1660 struct mempolicy *newpol;
1651 1661
1652 /* Falls back to MPOL_DEFAULT on any error */ 1662 /* Falls back to MPOL_DEFAULT on any error */
1653 newpol = mpol_new(policy, policy_nodes); 1663 newpol = mpol_new(policy, flags, policy_nodes);
1654 if (!IS_ERR(newpol)) { 1664 if (!IS_ERR(newpol)) {
1655 /* Create pseudo-vma that contains just the policy */ 1665 /* Create pseudo-vma that contains just the policy */
1656 struct vm_area_struct pvma; 1666 struct vm_area_struct pvma;
@@ -1671,9 +1681,10 @@ int mpol_set_shared_policy(struct shared_policy *info,
1671 struct sp_node *new = NULL; 1681 struct sp_node *new = NULL;
1672 unsigned long sz = vma_pages(vma); 1682 unsigned long sz = vma_pages(vma);
1673 1683
1674 pr_debug("set_shared_policy %lx sz %lu %d %lx\n", 1684 pr_debug("set_shared_policy %lx sz %lu %d %d %lx\n",
1675 vma->vm_pgoff, 1685 vma->vm_pgoff,
1676 sz, npol? npol->policy : -1, 1686 sz, npol ? npol->policy : -1,
1687 npol ? npol->flags : -1,
1677 npol ? nodes_addr(npol->v.nodes)[0] : -1); 1688 npol ? nodes_addr(npol->v.nodes)[0] : -1);
1678 1689
1679 if (npol) { 1690 if (npol) {
@@ -1746,14 +1757,14 @@ void __init numa_policy_init(void)
1746 if (unlikely(nodes_empty(interleave_nodes))) 1757 if (unlikely(nodes_empty(interleave_nodes)))
1747 node_set(prefer, interleave_nodes); 1758 node_set(prefer, interleave_nodes);
1748 1759
1749 if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes)) 1760 if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
1750 printk("numa_policy_init: interleaving failed\n"); 1761 printk("numa_policy_init: interleaving failed\n");
1751} 1762}
1752 1763
1753/* Reset policy of current process to default */ 1764/* Reset policy of current process to default */
1754void numa_default_policy(void) 1765void numa_default_policy(void)
1755{ 1766{
1756 do_set_mempolicy(MPOL_DEFAULT, NULL); 1767 do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
1757} 1768}
1758 1769
1759/* Migrate a policy to a different set of nodes */ 1770/* Migrate a policy to a different set of nodes */