aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2008-04-28 05:13:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:25 -0400
commit71fe804b6d56d6a7aed680e096901434cef6a2c3 (patch)
tree3dd437e09fe6ee57644c72c79e08c562d4bb6389 /mm/mempolicy.c
parent3f226aa1cbc006f9d90f22084f519ad2a1286cd8 (diff)
mempolicy: use struct mempolicy pointer in shmem_sb_info
This patch replaces the mempolicy mode, mode_flags, and nodemask in the shmem_sb_info struct with a struct mempolicy pointer, initialized to NULL. This removes dependency on the details of mempolicy from shmem.c and hugetlbfs inode.c and simplifies the interfaces. mpol_parse_str() in mempolicy.c is changed to return, via a pointer to a pointer arg, a struct mempolicy pointer on success. For MPOL_DEFAULT, the returned pointer is NULL. Further, mpol_parse_str() now takes a 'no_context' argument that causes the input nodemask to be stored in the w.user_nodemask of the created mempolicy for use when the mempolicy is installed in a tmpfs inode shared policy tree. At that time, any cpuset contextualization is applied to the original input nodemask. This preserves the previous behavior where the input nodemask was stored in the superblock. We can think of the returned mempolicy as "context free". Because mpol_parse_str() is now calling mpol_new(), we can remove from mpol_to_str() the semantic checks that mpol_new() already performs. Add 'no_context' parameter to mpol_to_str() to specify that it should format the nodemask in w.user_nodemask for 'bind' and 'interleave' policies. Change mpol_shared_policy_init() to take a pointer to a "context free" struct mempolicy and to create a new, "contextualized" mempolicy using the mode, mode_flags and user_nodemask from the input mempolicy. Note: we know that the mempolicy passed to mpol_to_str() or mpol_shared_policy_init() from a tmpfs superblock is "context free". This is currently the only instance thereof. However, if we found more uses for this concept, and introduced any ambiguity as to whether a mempolicy was context free or not, we could add another internal mode flag to identify context free mempolicies. Then, we could remove the 'no_context' argument from mpol_to_str(). Added shmem_get_sbmpol() to return a reference counted superblock mempolicy, if one exists, to pass to mpol_shared_policy_init(). We must add the reference under the sb stat_lock to prevent races with replacement of the mpol by remount. This reference is removed in mpol_shared_policy_init(). [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: another build fix] [akpm@linux-foundation.org: yet another build fix] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c144
1 files changed, 89 insertions, 55 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6b751565eed1..a37a5034f63d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1828,27 +1828,35 @@ restart:
1828 return 0; 1828 return 0;
1829} 1829}
1830 1830
1831void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, 1831/**
1832 unsigned short flags, nodemask_t *policy_nodes) 1832 * mpol_shared_policy_init - initialize shared policy for inode
1833{ 1833 * @sp: pointer to inode shared policy
1834 info->root = RB_ROOT; 1834 * @mpol: struct mempolicy to install
1835 spin_lock_init(&info->lock); 1835 *
1836 1836 * Install non-NULL @mpol in inode's shared policy rb-tree.
1837 if (policy != MPOL_DEFAULT) { 1837 * On entry, the current task has a reference on a non-NULL @mpol.
1838 struct mempolicy *newpol; 1838 * This must be released on exit.
1839 1839 */
1840 /* Falls back to NULL policy [MPOL_DEFAULT] on any error */ 1840void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1841 newpol = mpol_new(policy, flags, policy_nodes); 1841{
1842 if (!IS_ERR(newpol)) { 1842 sp->root = RB_ROOT; /* empty tree == default mempolicy */
1843 /* Create pseudo-vma that contains just the policy */ 1843 spin_lock_init(&sp->lock);
1844 struct vm_area_struct pvma; 1844
1845 1845 if (mpol) {
1846 memset(&pvma, 0, sizeof(struct vm_area_struct)); 1846 struct vm_area_struct pvma;
1847 /* Policy covers entire file */ 1847 struct mempolicy *new;
1848 pvma.vm_end = TASK_SIZE; 1848
1849 mpol_set_shared_policy(info, &pvma, newpol); 1849 /* contextualize the tmpfs mount point mempolicy */
1850 mpol_put(newpol); 1850 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
1851 } 1851 mpol_put(mpol); /* drop our ref on sb mpol */
1852 if (IS_ERR(new))
1853 return; /* no valid nodemask intersection */
1854
1855 /* Create pseudo-vma that contains just the policy */
1856 memset(&pvma, 0, sizeof(struct vm_area_struct));
1857 pvma.vm_end = TASK_SIZE; /* policy covers entire file */
1858 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
1859 mpol_put(new); /* drop initial ref */
1852 } 1860 }
1853} 1861}
1854 1862
@@ -1962,18 +1970,27 @@ static const char * const policy_types[] =
1962/** 1970/**
1963 * mpol_parse_str - parse string to mempolicy 1971 * mpol_parse_str - parse string to mempolicy
1964 * @str: string containing mempolicy to parse 1972 * @str: string containing mempolicy to parse
1965 * @mode: pointer to returned policy mode 1973 * @mpol: pointer to struct mempolicy pointer, returned on success.
1966 * @mode_flags: pointer to returned flags 1974 * @no_context: flag whether to "contextualize" the mempolicy
1967 * @policy_nodes: pointer to returned nodemask
1968 * 1975 *
1969 * Format of input: 1976 * Format of input:
1970 * <mode>[=<flags>][:<nodelist>] 1977 * <mode>[=<flags>][:<nodelist>]
1971 * 1978 *
1972 * Currently only used for tmpfs/shmem mount options 1979 * if @no_context is true, save the input nodemask in w.user_nodemask in
1980 * the returned mempolicy. This will be used to "clone" the mempolicy in
1981 * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
1982 * mount option. Note that if 'static' or 'relative' mode flags were
1983 * specified, the input nodemask will already have been saved. Saving
1984 * it again is redundant, but safe.
1985 *
1986 * On success, returns 0, else 1
1973 */ 1987 */
1974int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags, 1988int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
1975 nodemask_t *policy_nodes)
1976{ 1989{
1990 struct mempolicy *new = NULL;
1991 unsigned short uninitialized_var(mode);
1992 unsigned short uninitialized_var(mode_flags);
1993 nodemask_t nodes;
1977 char *nodelist = strchr(str, ':'); 1994 char *nodelist = strchr(str, ':');
1978 char *flags = strchr(str, '='); 1995 char *flags = strchr(str, '=');
1979 int i; 1996 int i;
@@ -1982,26 +1999,30 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
1982 if (nodelist) { 1999 if (nodelist) {
1983 /* NUL-terminate mode or flags string */ 2000 /* NUL-terminate mode or flags string */
1984 *nodelist++ = '\0'; 2001 *nodelist++ = '\0';
1985 if (nodelist_parse(nodelist, *policy_nodes)) 2002 if (nodelist_parse(nodelist, nodes))
1986 goto out; 2003 goto out;
1987 if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY])) 2004 if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
1988 goto out; 2005 goto out;
1989 } 2006 } else
2007 nodes_clear(nodes);
2008
1990 if (flags) 2009 if (flags)
1991 *flags++ = '\0'; /* terminate mode string */ 2010 *flags++ = '\0'; /* terminate mode string */
1992 2011
1993 for (i = 0; i <= MPOL_LOCAL; i++) { 2012 for (i = 0; i <= MPOL_LOCAL; i++) {
1994 if (!strcmp(str, policy_types[i])) { 2013 if (!strcmp(str, policy_types[i])) {
1995 *mode = i; 2014 mode = i;
1996 break; 2015 break;
1997 } 2016 }
1998 } 2017 }
1999 if (i > MPOL_LOCAL) 2018 if (i > MPOL_LOCAL)
2000 goto out; 2019 goto out;
2001 2020
2002 switch (*mode) { 2021 switch (mode) {
2003 case MPOL_PREFERRED: 2022 case MPOL_PREFERRED:
2004 /* Insist on a nodelist of one node only */ 2023 /*
2024 * Insist on a nodelist of one node only
2025 */
2005 if (nodelist) { 2026 if (nodelist) {
2006 char *rest = nodelist; 2027 char *rest = nodelist;
2007 while (isdigit(*rest)) 2028 while (isdigit(*rest))
@@ -2010,63 +2031,73 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
2010 err = 0; 2031 err = 0;
2011 } 2032 }
2012 break; 2033 break;
2013 case MPOL_BIND:
2014 /* Insist on a nodelist */
2015 if (nodelist)
2016 err = 0;
2017 break;
2018 case MPOL_INTERLEAVE: 2034 case MPOL_INTERLEAVE:
2019 /* 2035 /*
2020 * Default to online nodes with memory if no nodelist 2036 * Default to online nodes with memory if no nodelist
2021 */ 2037 */
2022 if (!nodelist) 2038 if (!nodelist)
2023 *policy_nodes = node_states[N_HIGH_MEMORY]; 2039 nodes = node_states[N_HIGH_MEMORY];
2024 err = 0; 2040 err = 0;
2025 break; 2041 break;
2026 default: 2042 case MPOL_LOCAL:
2027 /* 2043 /*
2028 * MPOL_DEFAULT or MPOL_LOCAL 2044 * Don't allow a nodelist; mpol_new() checks flags
2029 * Don't allow a nodelist nor flags
2030 */ 2045 */
2031 if (!nodelist && !flags) 2046 if (nodelist)
2032 err = 0;
2033 if (*mode == MPOL_DEFAULT)
2034 goto out; 2047 goto out;
2035 /* else MPOL_LOCAL */ 2048 mode = MPOL_PREFERRED;
2036 *mode = MPOL_PREFERRED;
2037 nodes_clear(*policy_nodes);
2038 break; 2049 break;
2050
2051 /*
2052 * case MPOL_BIND: mpol_new() enforces non-empty nodemask.
2053 * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags.
2054 */
2039 } 2055 }
2040 2056
2041 *mode_flags = 0; 2057 mode_flags = 0;
2042 if (flags) { 2058 if (flags) {
2043 /* 2059 /*
2044 * Currently, we only support two mutually exclusive 2060 * Currently, we only support two mutually exclusive
2045 * mode flags. 2061 * mode flags.
2046 */ 2062 */
2047 if (!strcmp(flags, "static")) 2063 if (!strcmp(flags, "static"))
2048 *mode_flags |= MPOL_F_STATIC_NODES; 2064 mode_flags |= MPOL_F_STATIC_NODES;
2049 else if (!strcmp(flags, "relative")) 2065 else if (!strcmp(flags, "relative"))
2050 *mode_flags |= MPOL_F_RELATIVE_NODES; 2066 mode_flags |= MPOL_F_RELATIVE_NODES;
2051 else 2067 else
2052 err = 1; 2068 err = 1;
2053 } 2069 }
2070
2071 new = mpol_new(mode, mode_flags, &nodes);
2072 if (IS_ERR(new))
2073 err = 1;
2074 else if (no_context)
2075 new->w.user_nodemask = nodes; /* save for contextualization */
2076
2054out: 2077out:
2055 /* Restore string for error message */ 2078 /* Restore string for error message */
2056 if (nodelist) 2079 if (nodelist)
2057 *--nodelist = ':'; 2080 *--nodelist = ':';
2058 if (flags) 2081 if (flags)
2059 *--flags = '='; 2082 *--flags = '=';
2083 if (!err)
2084 *mpol = new;
2060 return err; 2085 return err;
2061} 2086}
2062#endif /* CONFIG_TMPFS */ 2087#endif /* CONFIG_TMPFS */
2063 2088
2064/* 2089/**
2090 * mpol_to_str - format a mempolicy structure for printing
2091 * @buffer: to contain formatted mempolicy string
2092 * @maxlen: length of @buffer
2093 * @pol: pointer to mempolicy to be formatted
2094 * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask
2095 *
2065 * Convert a mempolicy into a string. 2096 * Convert a mempolicy into a string.
2066 * Returns the number of characters in buffer (if positive) 2097 * Returns the number of characters in buffer (if positive)
2067 * or an error (negative) 2098 * or an error (negative)
2068 */ 2099 */
2069int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) 2100int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
2070{ 2101{
2071 char *p = buffer; 2102 char *p = buffer;
2072 int l; 2103 int l;
@@ -2100,7 +2131,10 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
2100 case MPOL_BIND: 2131 case MPOL_BIND:
2101 /* Fall through */ 2132 /* Fall through */
2102 case MPOL_INTERLEAVE: 2133 case MPOL_INTERLEAVE:
2103 nodes = pol->v.nodes; 2134 if (no_context)
2135 nodes = pol->w.user_nodemask;
2136 else
2137 nodes = pol->v.nodes;
2104 break; 2138 break;
2105 2139
2106 default: 2140 default:
@@ -2231,7 +2265,7 @@ int show_numa_map(struct seq_file *m, void *v)
2231 return 0; 2265 return 0;
2232 2266
2233 pol = get_vma_policy(priv->task, vma, vma->vm_start); 2267 pol = get_vma_policy(priv->task, vma, vma->vm_start);
2234 mpol_to_str(buffer, sizeof(buffer), pol); 2268 mpol_to_str(buffer, sizeof(buffer), pol, 0);
2235 mpol_cond_put(pol); 2269 mpol_cond_put(pol);
2236 2270
2237 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 2271 seq_printf(m, "%08lx %s", vma->vm_start, buffer);