aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2008-04-28 05:12:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commit028fec414d803117eb4b2ed12acb4dd5da65b32d (patch)
tree427f37ea0331369c1babc55c424c4fd2ac3b39f5 /mm
parenta3b51e0142d1be156ac697eaadadd6cfbb7ba32b (diff)
mempolicy: support optional mode flags
With the evolution of mempolicies, it is necessary to support mempolicy mode flags that specify how the policy shall behave in certain circumstances. The most immediate need for mode flag support is to suppress remapping the nodemask of a policy at the time of rebind. Both the mempolicy mode and flags are passed by the user in the 'int policy' formal of either the set_mempolicy() or mbind() syscall. A new constant, MPOL_MODE_FLAGS, represents the union of legal optional flags that may be passed as part of this int. Mempolicies that include illegal flags as part of their policy are rejected as invalid. An additional member to struct mempolicy is added to support the mode flags: struct mempolicy { ... unsigned short policy; unsigned short flags; } The splitting of the 'int' actual passed by the user is done in sys_set_mempolicy() and sys_mbind() for their respective syscalls. This is done by intersecting the actual with MPOL_MODE_FLAGS, rejecting the syscall of there are additional flags, and storing it in the new 'flags' member of struct mempolicy. The intersection of the actual with ~MPOL_MODE_FLAGS is stored in the 'policy' member of the struct and all current users of pol->policy remain unchanged. The union of the policy mode and optional mode flags is passed back to the user in get_mempolicy(). This combination of mode and flags within the same actual does not break userspace code that relies on get_mempolicy(&policy, ...) and either switch (policy) { case MPOL_BIND: ... case MPOL_INTERLEAVE: ... }; statements or if (policy == MPOL_INTERLEAVE) { ... } statements. Such applications would need to use optional mode flags when calling set_mempolicy() or mbind() for these previously implemented statements to stop working. If an application does start using optional mode flags, it will need to mask the optional flags off the policy in switch and conditional statements that only test mode. An additional member is also added to struct shmem_sb_info to store the optional mode flags. [hugh@veritas.com: shmem mpol: fix build warning] Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c51
-rw-r--r--mm/shmem.c24
2 files changed, 47 insertions, 28 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1311dc4a3888..1f6ff9c1bbc3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -187,12 +187,13 @@ static int is_valid_nodemask(nodemask_t *nodemask)
187} 187}
188 188
189/* Create a new policy */ 189/* Create a new policy */
190static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes) 190static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
191 nodemask_t *nodes)
191{ 192{
192 struct mempolicy *policy; 193 struct mempolicy *policy;
193 194
194 pr_debug("setting mode %d nodes[0] %lx\n", 195 pr_debug("setting mode %d flags %d nodes[0] %lx\n",
195 mode, nodes ? nodes_addr(*nodes)[0] : -1); 196 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
196 197
197 if (mode == MPOL_DEFAULT) 198 if (mode == MPOL_DEFAULT)
198 return NULL; 199 return NULL;
@@ -224,6 +225,7 @@ static struct mempolicy *mpol_new(unsigned short mode, nodemask_t *nodes)
224 BUG(); 225 BUG();
225 } 226 }
226 policy->policy = mode; 227 policy->policy = mode;
228 policy->flags = flags;
227 policy->cpuset_mems_allowed = cpuset_mems_allowed(current); 229 policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
228 return policy; 230 return policy;
229} 231}
@@ -466,13 +468,14 @@ static void mpol_set_task_struct_flag(void)
466} 468}
467 469
468/* Set the process memory policy */ 470/* Set the process memory policy */
469static long do_set_mempolicy(unsigned short mode, nodemask_t *nodes) 471static long do_set_mempolicy(unsigned short mode, unsigned short flags,
472 nodemask_t *nodes)
470{ 473{
471 struct mempolicy *new; 474 struct mempolicy *new;
472 475
473 if (mpol_check_policy(mode, nodes)) 476 if (mpol_check_policy(mode, nodes))
474 return -EINVAL; 477 return -EINVAL;
475 new = mpol_new(mode, nodes); 478 new = mpol_new(mode, flags, nodes);
476 if (IS_ERR(new)) 479 if (IS_ERR(new))
477 return PTR_ERR(new); 480 return PTR_ERR(new);
478 mpol_free(current->mempolicy); 481 mpol_free(current->mempolicy);
@@ -573,7 +576,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
573 goto out; 576 goto out;
574 } 577 }
575 } else 578 } else
576 *policy = pol->policy; 579 *policy = pol->policy | pol->flags;
577 580
578 if (vma) { 581 if (vma) {
579 up_read(&current->mm->mmap_sem); 582 up_read(&current->mm->mmap_sem);
@@ -763,8 +766,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
763#endif 766#endif
764 767
765static long do_mbind(unsigned long start, unsigned long len, 768static long do_mbind(unsigned long start, unsigned long len,
766 unsigned short mode, nodemask_t *nmask, 769 unsigned short mode, unsigned short mode_flags,
767 unsigned long flags) 770 nodemask_t *nmask, unsigned long flags)
768{ 771{
769 struct vm_area_struct *vma; 772 struct vm_area_struct *vma;
770 struct mm_struct *mm = current->mm; 773 struct mm_struct *mm = current->mm;
@@ -796,7 +799,7 @@ static long do_mbind(unsigned long start, unsigned long len,
796 if (mpol_check_policy(mode, nmask)) 799 if (mpol_check_policy(mode, nmask))
797 return -EINVAL; 800 return -EINVAL;
798 801
799 new = mpol_new(mode, nmask); 802 new = mpol_new(mode, mode_flags, nmask);
800 if (IS_ERR(new)) 803 if (IS_ERR(new))
801 return PTR_ERR(new); 804 return PTR_ERR(new);
802 805
@@ -807,8 +810,9 @@ static long do_mbind(unsigned long start, unsigned long len,
807 if (!new) 810 if (!new)
808 flags |= MPOL_MF_DISCONTIG_OK; 811 flags |= MPOL_MF_DISCONTIG_OK;
809 812
810 pr_debug("mbind %lx-%lx mode:%d nodes:%lx\n", start, start + len, 813 pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",
811 mode, nmask ? nodes_addr(*nmask)[0] : -1); 814 start, start + len, mode, mode_flags,
815 nmask ? nodes_addr(*nmask)[0] : -1);
812 816
813 down_write(&mm->mmap_sem); 817 down_write(&mm->mmap_sem);
814 vma = check_range(mm, start, end, nmask, 818 vma = check_range(mm, start, end, nmask,
@@ -907,13 +911,16 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
907{ 911{
908 nodemask_t nodes; 912 nodemask_t nodes;
909 int err; 913 int err;
914 unsigned short mode_flags;
910 915
916 mode_flags = mode & MPOL_MODE_FLAGS;
917 mode &= ~MPOL_MODE_FLAGS;
911 if (mode >= MPOL_MAX) 918 if (mode >= MPOL_MAX)
912 return -EINVAL; 919 return -EINVAL;
913 err = get_nodes(&nodes, nmask, maxnode); 920 err = get_nodes(&nodes, nmask, maxnode);
914 if (err) 921 if (err)
915 return err; 922 return err;
916 return do_mbind(start, len, mode, &nodes, flags); 923 return do_mbind(start, len, mode, mode_flags, &nodes, flags);
917} 924}
918 925
919/* Set the process memory policy */ 926/* Set the process memory policy */
@@ -922,13 +929,16 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
922{ 929{
923 int err; 930 int err;
924 nodemask_t nodes; 931 nodemask_t nodes;
932 unsigned short flags;
925 933
926 if (mode < 0 || mode >= MPOL_MAX) 934 flags = mode & MPOL_MODE_FLAGS;
935 mode &= ~MPOL_MODE_FLAGS;
936 if ((unsigned int)mode >= MPOL_MAX)
927 return -EINVAL; 937 return -EINVAL;
928 err = get_nodes(&nodes, nmask, maxnode); 938 err = get_nodes(&nodes, nmask, maxnode);
929 if (err) 939 if (err)
930 return err; 940 return err;
931 return do_set_mempolicy(mode, &nodes); 941 return do_set_mempolicy(mode, flags, &nodes);
932} 942}
933 943
934asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, 944asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
@@ -1641,7 +1651,7 @@ restart:
1641} 1651}
1642 1652
1643void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, 1653void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
1644 nodemask_t *policy_nodes) 1654 unsigned short flags, nodemask_t *policy_nodes)
1645{ 1655{
1646 info->root = RB_ROOT; 1656 info->root = RB_ROOT;
1647 spin_lock_init(&info->lock); 1657 spin_lock_init(&info->lock);
@@ -1650,7 +1660,7 @@ void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
1650 struct mempolicy *newpol; 1660 struct mempolicy *newpol;
1651 1661
1652 /* Falls back to MPOL_DEFAULT on any error */ 1662 /* Falls back to MPOL_DEFAULT on any error */
1653 newpol = mpol_new(policy, policy_nodes); 1663 newpol = mpol_new(policy, flags, policy_nodes);
1654 if (!IS_ERR(newpol)) { 1664 if (!IS_ERR(newpol)) {
1655 /* Create pseudo-vma that contains just the policy */ 1665 /* Create pseudo-vma that contains just the policy */
1656 struct vm_area_struct pvma; 1666 struct vm_area_struct pvma;
@@ -1671,9 +1681,10 @@ int mpol_set_shared_policy(struct shared_policy *info,
1671 struct sp_node *new = NULL; 1681 struct sp_node *new = NULL;
1672 unsigned long sz = vma_pages(vma); 1682 unsigned long sz = vma_pages(vma);
1673 1683
1674 pr_debug("set_shared_policy %lx sz %lu %d %lx\n", 1684 pr_debug("set_shared_policy %lx sz %lu %d %d %lx\n",
1675 vma->vm_pgoff, 1685 vma->vm_pgoff,
1676 sz, npol? npol->policy : -1, 1686 sz, npol ? npol->policy : -1,
1687 npol ? npol->flags : -1,
1677 npol ? nodes_addr(npol->v.nodes)[0] : -1); 1688 npol ? nodes_addr(npol->v.nodes)[0] : -1);
1678 1689
1679 if (npol) { 1690 if (npol) {
@@ -1746,14 +1757,14 @@ void __init numa_policy_init(void)
1746 if (unlikely(nodes_empty(interleave_nodes))) 1757 if (unlikely(nodes_empty(interleave_nodes)))
1747 node_set(prefer, interleave_nodes); 1758 node_set(prefer, interleave_nodes);
1748 1759
1749 if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes)) 1760 if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
1750 printk("numa_policy_init: interleaving failed\n"); 1761 printk("numa_policy_init: interleaving failed\n");
1751} 1762}
1752 1763
1753/* Reset policy of current process to default */ 1764/* Reset policy of current process to default */
1754void numa_default_policy(void) 1765void numa_default_policy(void)
1755{ 1766{
1756 do_set_mempolicy(MPOL_DEFAULT, NULL); 1767 do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
1757} 1768}
1758 1769
1759/* Migrate a policy to a different set of nodes */ 1770/* Migrate a policy to a different set of nodes */
diff --git a/mm/shmem.c b/mm/shmem.c
index d8ef7ba831a5..1ccf794fbe61 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1080,9 +1080,10 @@ redirty:
1080#ifdef CONFIG_NUMA 1080#ifdef CONFIG_NUMA
1081#ifdef CONFIG_TMPFS 1081#ifdef CONFIG_TMPFS
1082static int shmem_parse_mpol(char *value, unsigned short *policy, 1082static int shmem_parse_mpol(char *value, unsigned short *policy,
1083 nodemask_t *policy_nodes) 1083 unsigned short *mode_flags, nodemask_t *policy_nodes)
1084{ 1084{
1085 char *nodelist = strchr(value, ':'); 1085 char *nodelist = strchr(value, ':');
1086 char *flags = strchr(value, '=');
1086 int err = 1; 1087 int err = 1;
1087 1088
1088 if (nodelist) { 1089 if (nodelist) {
@@ -1093,6 +1094,8 @@ static int shmem_parse_mpol(char *value, unsigned short *policy,
1093 if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY])) 1094 if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
1094 goto out; 1095 goto out;
1095 } 1096 }
1097 if (flags)
1098 *flags++ = '\0';
1096 if (!strcmp(value, "default")) { 1099 if (!strcmp(value, "default")) {
1097 *policy = MPOL_DEFAULT; 1100 *policy = MPOL_DEFAULT;
1098 /* Don't allow a nodelist */ 1101 /* Don't allow a nodelist */
@@ -1122,6 +1125,8 @@ static int shmem_parse_mpol(char *value, unsigned short *policy,
1122 *policy_nodes = node_states[N_HIGH_MEMORY]; 1125 *policy_nodes = node_states[N_HIGH_MEMORY];
1123 err = 0; 1126 err = 0;
1124 } 1127 }
1128 if (flags) {
1129 }
1125out: 1130out:
1126 /* Restore string for error message */ 1131 /* Restore string for error message */
1127 if (nodelist) 1132 if (nodelist)
@@ -1130,7 +1135,7 @@ out:
1130} 1135}
1131 1136
1132static void shmem_show_mpol(struct seq_file *seq, unsigned short policy, 1137static void shmem_show_mpol(struct seq_file *seq, unsigned short policy,
1133 const nodemask_t policy_nodes) 1138 unsigned short flags, const nodemask_t policy_nodes)
1134{ 1139{
1135 char *policy_string; 1140 char *policy_string;
1136 1141
@@ -1199,13 +1204,13 @@ static struct page *shmem_alloc_page(gfp_t gfp,
1199#else /* !CONFIG_NUMA */ 1204#else /* !CONFIG_NUMA */
1200#ifdef CONFIG_TMPFS 1205#ifdef CONFIG_TMPFS
1201static inline int shmem_parse_mpol(char *value, unsigned short *policy, 1206static inline int shmem_parse_mpol(char *value, unsigned short *policy,
1202 nodemask_t *policy_nodes) 1207 unsigned short *mode_flags, nodemask_t *policy_nodes)
1203{ 1208{
1204 return 1; 1209 return 1;
1205} 1210}
1206 1211
1207static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy, 1212static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy,
1208 const nodemask_t policy_nodes) 1213 unsigned short flags, const nodemask_t policy_nodes)
1209{ 1214{
1210} 1215}
1211#endif /* CONFIG_TMPFS */ 1216#endif /* CONFIG_TMPFS */
@@ -1578,7 +1583,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1578 inode->i_op = &shmem_inode_operations; 1583 inode->i_op = &shmem_inode_operations;
1579 inode->i_fop = &shmem_file_operations; 1584 inode->i_fop = &shmem_file_operations;
1580 mpol_shared_policy_init(&info->policy, sbinfo->policy, 1585 mpol_shared_policy_init(&info->policy, sbinfo->policy,
1581 &sbinfo->policy_nodes); 1586 sbinfo->flags, &sbinfo->policy_nodes);
1582 break; 1587 break;
1583 case S_IFDIR: 1588 case S_IFDIR:
1584 inc_nlink(inode); 1589 inc_nlink(inode);
@@ -1592,7 +1597,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1592 * Must not load anything in the rbtree, 1597 * Must not load anything in the rbtree,
1593 * mpol_free_shared_policy will not be called. 1598 * mpol_free_shared_policy will not be called.
1594 */ 1599 */
1595 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 1600 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0,
1596 NULL); 1601 NULL);
1597 break; 1602 break;
1598 } 1603 }
@@ -2209,7 +2214,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2209 goto bad_val; 2214 goto bad_val;
2210 } else if (!strcmp(this_char,"mpol")) { 2215 } else if (!strcmp(this_char,"mpol")) {
2211 if (shmem_parse_mpol(value, &sbinfo->policy, 2216 if (shmem_parse_mpol(value, &sbinfo->policy,
2212 &sbinfo->policy_nodes)) 2217 &sbinfo->flags, &sbinfo->policy_nodes))
2213 goto bad_val; 2218 goto bad_val;
2214 } else { 2219 } else {
2215 printk(KERN_ERR "tmpfs: Bad mount option %s\n", 2220 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2261,6 +2266,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2261 sbinfo->max_inodes = config.max_inodes; 2266 sbinfo->max_inodes = config.max_inodes;
2262 sbinfo->free_inodes = config.max_inodes - inodes; 2267 sbinfo->free_inodes = config.max_inodes - inodes;
2263 sbinfo->policy = config.policy; 2268 sbinfo->policy = config.policy;
2269 sbinfo->flags = config.flags;
2264 sbinfo->policy_nodes = config.policy_nodes; 2270 sbinfo->policy_nodes = config.policy_nodes;
2265out: 2271out:
2266 spin_unlock(&sbinfo->stat_lock); 2272 spin_unlock(&sbinfo->stat_lock);
@@ -2282,7 +2288,8 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
2282 seq_printf(seq, ",uid=%u", sbinfo->uid); 2288 seq_printf(seq, ",uid=%u", sbinfo->uid);
2283 if (sbinfo->gid != 0) 2289 if (sbinfo->gid != 0)
2284 seq_printf(seq, ",gid=%u", sbinfo->gid); 2290 seq_printf(seq, ",gid=%u", sbinfo->gid);
2285 shmem_show_mpol(seq, sbinfo->policy, sbinfo->policy_nodes); 2291 shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags,
2292 sbinfo->policy_nodes);
2286 return 0; 2293 return 0;
2287} 2294}
2288#endif /* CONFIG_TMPFS */ 2295#endif /* CONFIG_TMPFS */
@@ -2313,6 +2320,7 @@ static int shmem_fill_super(struct super_block *sb,
2313 sbinfo->uid = current->fsuid; 2320 sbinfo->uid = current->fsuid;
2314 sbinfo->gid = current->fsgid; 2321 sbinfo->gid = current->fsgid;
2315 sbinfo->policy = MPOL_DEFAULT; 2322 sbinfo->policy = MPOL_DEFAULT;
2323 sbinfo->flags = 0;
2316 sbinfo->policy_nodes = node_states[N_HIGH_MEMORY]; 2324 sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
2317 sb->s_fs_info = sbinfo; 2325 sb->s_fs_info = sbinfo;
2318 2326