aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2013-01-02 05:01:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-01-02 12:27:10 -0500
commitf2a07f40dbc603c15f8b06e6ec7f768af67b424f (patch)
tree965f2db39cbb57c55da3ebfc31249e26eb746906 /mm/mempolicy.c
parent128dd1759d96ad36c379240f8b9463e8acfd37a1 (diff)
tmpfs mempolicy: fix /proc/mounts corrupting memory
Recently I suggested using "mount -o remount,mpol=local /tmp" in NUMA mempolicy testing. Very nasty. Reading /proc/mounts, /proc/pid/mounts or /proc/pid/mountinfo may then corrupt one bit of kernel memory, often in a page table (causing "Bad swap" or "Bad page map" warning or "Bad pagetable" oops), sometimes in a vm_area_struct or rbnode or somewhere worse. "mpol=prefer" and "mpol=prefer:Node" are equally toxic. Recent NUMA enhancements are not to blame: this dates back to 2.6.35, when commit e17f74af351c "mempolicy: don't call mpol_set_nodemask() when no_context" skipped mpol_parse_str()'s call to mpol_set_nodemask(), which used to initialize v.preferred_node, or set MPOL_F_LOCAL in flags. With slab poisoning, you can then rely on mpol_to_str() to set the bit for node 0x6b6b, probably in the next page above the caller's stack. mpol_parse_str() is only called from shmem_parse_options(): no_context is always true, so call it unused for now, and remove !no_context code. Set v.nodes or v.preferred_node or MPOL_F_LOCAL as mpol_to_str() might expect. Then mpol_to_str() can ignore its no_context argument also, the mpol being appropriately initialized whether contextualized or not. Rename its no_context unused too, and let subsequent patch remove them (that's not needed for stable backporting, which would involve rejects). I don't understand why MPOL_LOCAL is described as a pseudo-policy: it's a reasonable policy which suffers from a confusing implementation in terms of MPOL_PREFERRED with MPOL_F_LOCAL. I believe this would be much more robust if MPOL_LOCAL were recognized in switch statements throughout, MPOL_F_LOCAL deleted, and MPOL_PREFERRED use the (possibly empty) nodes mask like everyone else, instead of its preferred_node variant (I presume an optimization from the days before MPOL_LOCAL). But that would take me too long to get right and fully tested. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c64
1 files changed, 26 insertions, 38 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d1b315e98627..02c914cca53d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2595,8 +2595,7 @@ void numa_default_policy(void)
2595 */ 2595 */
2596 2596
2597/* 2597/*
2598 * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag 2598 * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag.
2599 * Used only for mpol_parse_str() and mpol_to_str()
2600 */ 2599 */
2601static const char * const policy_modes[] = 2600static const char * const policy_modes[] =
2602{ 2601{
@@ -2610,28 +2609,21 @@ static const char * const policy_modes[] =
2610 2609
2611#ifdef CONFIG_TMPFS 2610#ifdef CONFIG_TMPFS
2612/** 2611/**
2613 * mpol_parse_str - parse string to mempolicy 2612 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
2614 * @str: string containing mempolicy to parse 2613 * @str: string containing mempolicy to parse
2615 * @mpol: pointer to struct mempolicy pointer, returned on success. 2614 * @mpol: pointer to struct mempolicy pointer, returned on success.
2616 * @no_context: flag whether to "contextualize" the mempolicy 2615 * @unused: redundant argument, to be removed later.
2617 * 2616 *
2618 * Format of input: 2617 * Format of input:
2619 * <mode>[=<flags>][:<nodelist>] 2618 * <mode>[=<flags>][:<nodelist>]
2620 * 2619 *
2621 * if @no_context is true, save the input nodemask in w.user_nodemask in
2622 * the returned mempolicy. This will be used to "clone" the mempolicy in
2623 * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
2624 * mount option. Note that if 'static' or 'relative' mode flags were
2625 * specified, the input nodemask will already have been saved. Saving
2626 * it again is redundant, but safe.
2627 *
2628 * On success, returns 0, else 1 2620 * On success, returns 0, else 1
2629 */ 2621 */
2630int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) 2622int mpol_parse_str(char *str, struct mempolicy **mpol, int unused)
2631{ 2623{
2632 struct mempolicy *new = NULL; 2624 struct mempolicy *new = NULL;
2633 unsigned short mode; 2625 unsigned short mode;
2634 unsigned short uninitialized_var(mode_flags); 2626 unsigned short mode_flags;
2635 nodemask_t nodes; 2627 nodemask_t nodes;
2636 char *nodelist = strchr(str, ':'); 2628 char *nodelist = strchr(str, ':');
2637 char *flags = strchr(str, '='); 2629 char *flags = strchr(str, '=');
@@ -2719,24 +2711,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2719 if (IS_ERR(new)) 2711 if (IS_ERR(new))
2720 goto out; 2712 goto out;
2721 2713
2722 if (no_context) { 2714 /*
2723 /* save for contextualization */ 2715 * Save nodes for mpol_to_str() to show the tmpfs mount options
2724 new->w.user_nodemask = nodes; 2716 * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
2725 } else { 2717 */
2726 int ret; 2718 if (mode != MPOL_PREFERRED)
2727 NODEMASK_SCRATCH(scratch); 2719 new->v.nodes = nodes;
2728 if (scratch) { 2720 else if (nodelist)
2729 task_lock(current); 2721 new->v.preferred_node = first_node(nodes);
2730 ret = mpol_set_nodemask(new, &nodes, scratch); 2722 else
2731 task_unlock(current); 2723 new->flags |= MPOL_F_LOCAL;
2732 } else 2724
2733 ret = -ENOMEM; 2725 /*
2734 NODEMASK_SCRATCH_FREE(scratch); 2726 * Save nodes for contextualization: this will be used to "clone"
2735 if (ret) { 2727 * the mempolicy in a specific context [cpuset] at a later time.
2736 mpol_put(new); 2728 */
2737 goto out; 2729 new->w.user_nodemask = nodes;
2738 } 2730
2739 }
2740 err = 0; 2731 err = 0;
2741 2732
2742out: 2733out:
@@ -2756,13 +2747,13 @@ out:
2756 * @buffer: to contain formatted mempolicy string 2747 * @buffer: to contain formatted mempolicy string
2757 * @maxlen: length of @buffer 2748 * @maxlen: length of @buffer
2758 * @pol: pointer to mempolicy to be formatted 2749 * @pol: pointer to mempolicy to be formatted
2759 * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask 2750 * @unused: redundant argument, to be removed later.
2760 * 2751 *
2761 * Convert a mempolicy into a string. 2752 * Convert a mempolicy into a string.
2762 * Returns the number of characters in buffer (if positive) 2753 * Returns the number of characters in buffer (if positive)
2763 * or an error (negative) 2754 * or an error (negative)
2764 */ 2755 */
2765int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) 2756int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused)
2766{ 2757{
2767 char *p = buffer; 2758 char *p = buffer;
2768 int l; 2759 int l;
@@ -2788,7 +2779,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
2788 case MPOL_PREFERRED: 2779 case MPOL_PREFERRED:
2789 nodes_clear(nodes); 2780 nodes_clear(nodes);
2790 if (flags & MPOL_F_LOCAL) 2781 if (flags & MPOL_F_LOCAL)
2791 mode = MPOL_LOCAL; /* pseudo-policy */ 2782 mode = MPOL_LOCAL;
2792 else 2783 else
2793 node_set(pol->v.preferred_node, nodes); 2784 node_set(pol->v.preferred_node, nodes);
2794 break; 2785 break;
@@ -2796,10 +2787,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
2796 case MPOL_BIND: 2787 case MPOL_BIND:
2797 /* Fall through */ 2788 /* Fall through */
2798 case MPOL_INTERLEAVE: 2789 case MPOL_INTERLEAVE:
2799 if (no_context) 2790 nodes = pol->v.nodes;
2800 nodes = pol->w.user_nodemask;
2801 else
2802 nodes = pol->v.nodes;
2803 break; 2791 break;
2804 2792
2805 default: 2793 default: