diff options
author | Hugh Dickins <hughd@google.com> | 2013-01-02 05:01:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-02 12:27:10 -0500 |
commit | f2a07f40dbc603c15f8b06e6ec7f768af67b424f (patch) | |
tree | 965f2db39cbb57c55da3ebfc31249e26eb746906 /mm | |
parent | 128dd1759d96ad36c379240f8b9463e8acfd37a1 (diff) |
tmpfs mempolicy: fix /proc/mounts corrupting memory
Recently I suggested using "mount -o remount,mpol=local /tmp" in NUMA
mempolicy testing. Very nasty. Reading /proc/mounts, /proc/pid/mounts
or /proc/pid/mountinfo may then corrupt one bit of kernel memory, often
in a page table (causing "Bad swap" or "Bad page map" warning or "Bad
pagetable" oops), sometimes in a vm_area_struct or rbnode or somewhere
worse. "mpol=prefer" and "mpol=prefer:Node" are equally toxic.
Recent NUMA enhancements are not to blame: this dates back to 2.6.35,
when commit e17f74af351c "mempolicy: don't call mpol_set_nodemask() when
no_context" skipped mpol_parse_str()'s call to mpol_set_nodemask(),
which used to initialize v.preferred_node, or set MPOL_F_LOCAL in flags.
With slab poisoning, you can then rely on mpol_to_str() to set the bit
for node 0x6b6b, probably in the next page above the caller's stack.
mpol_parse_str() is only called from shmem_parse_options(): no_context
is always true, so call it unused for now, and remove !no_context code.
Set v.nodes or v.preferred_node or MPOL_F_LOCAL as mpol_to_str() might
expect. Then mpol_to_str() can ignore its no_context argument also,
the mpol being appropriately initialized whether contextualized or not.
Rename its no_context unused too, and let subsequent patch remove them
(that's not needed for stable backporting, which would involve rejects).
I don't understand why MPOL_LOCAL is described as a pseudo-policy:
it's a reasonable policy which suffers from a confusing implementation
in terms of MPOL_PREFERRED with MPOL_F_LOCAL. I believe this would be
much more robust if MPOL_LOCAL were recognized in switch statements
throughout, MPOL_F_LOCAL deleted, and MPOL_PREFERRED use the (possibly
empty) nodes mask like everyone else, instead of its preferred_node
variant (I presume an optimization from the days before MPOL_LOCAL).
But that would take me too long to get right and fully tested.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 64 |
1 files changed, 26 insertions, 38 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d1b315e98627..02c914cca53d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2595,8 +2595,7 @@ void numa_default_policy(void) | |||
2595 | */ | 2595 | */ |
2596 | 2596 | ||
2597 | /* | 2597 | /* |
2598 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2598 | * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag. |
2599 | * Used only for mpol_parse_str() and mpol_to_str() | ||
2600 | */ | 2599 | */ |
2601 | static const char * const policy_modes[] = | 2600 | static const char * const policy_modes[] = |
2602 | { | 2601 | { |
@@ -2610,28 +2609,21 @@ static const char * const policy_modes[] = | |||
2610 | 2609 | ||
2611 | #ifdef CONFIG_TMPFS | 2610 | #ifdef CONFIG_TMPFS |
2612 | /** | 2611 | /** |
2613 | * mpol_parse_str - parse string to mempolicy | 2612 | * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. |
2614 | * @str: string containing mempolicy to parse | 2613 | * @str: string containing mempolicy to parse |
2615 | * @mpol: pointer to struct mempolicy pointer, returned on success. | 2614 | * @mpol: pointer to struct mempolicy pointer, returned on success. |
2616 | * @no_context: flag whether to "contextualize" the mempolicy | 2615 | * @unused: redundant argument, to be removed later. |
2617 | * | 2616 | * |
2618 | * Format of input: | 2617 | * Format of input: |
2619 | * <mode>[=<flags>][:<nodelist>] | 2618 | * <mode>[=<flags>][:<nodelist>] |
2620 | * | 2619 | * |
2621 | * if @no_context is true, save the input nodemask in w.user_nodemask in | ||
2622 | * the returned mempolicy. This will be used to "clone" the mempolicy in | ||
2623 | * a specific context [cpuset] at a later time. Used to parse tmpfs mpol | ||
2624 | * mount option. Note that if 'static' or 'relative' mode flags were | ||
2625 | * specified, the input nodemask will already have been saved. Saving | ||
2626 | * it again is redundant, but safe. | ||
2627 | * | ||
2628 | * On success, returns 0, else 1 | 2620 | * On success, returns 0, else 1 |
2629 | */ | 2621 | */ |
2630 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2622 | int mpol_parse_str(char *str, struct mempolicy **mpol, int unused) |
2631 | { | 2623 | { |
2632 | struct mempolicy *new = NULL; | 2624 | struct mempolicy *new = NULL; |
2633 | unsigned short mode; | 2625 | unsigned short mode; |
2634 | unsigned short uninitialized_var(mode_flags); | 2626 | unsigned short mode_flags; |
2635 | nodemask_t nodes; | 2627 | nodemask_t nodes; |
2636 | char *nodelist = strchr(str, ':'); | 2628 | char *nodelist = strchr(str, ':'); |
2637 | char *flags = strchr(str, '='); | 2629 | char *flags = strchr(str, '='); |
@@ -2719,24 +2711,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2719 | if (IS_ERR(new)) | 2711 | if (IS_ERR(new)) |
2720 | goto out; | 2712 | goto out; |
2721 | 2713 | ||
2722 | if (no_context) { | 2714 | /* |
2723 | /* save for contextualization */ | 2715 | * Save nodes for mpol_to_str() to show the tmpfs mount options |
2724 | new->w.user_nodemask = nodes; | 2716 | * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. |
2725 | } else { | 2717 | */ |
2726 | int ret; | 2718 | if (mode != MPOL_PREFERRED) |
2727 | NODEMASK_SCRATCH(scratch); | 2719 | new->v.nodes = nodes; |
2728 | if (scratch) { | 2720 | else if (nodelist) |
2729 | task_lock(current); | 2721 | new->v.preferred_node = first_node(nodes); |
2730 | ret = mpol_set_nodemask(new, &nodes, scratch); | 2722 | else |
2731 | task_unlock(current); | 2723 | new->flags |= MPOL_F_LOCAL; |
2732 | } else | 2724 | |
2733 | ret = -ENOMEM; | 2725 | /* |
2734 | NODEMASK_SCRATCH_FREE(scratch); | 2726 | * Save nodes for contextualization: this will be used to "clone" |
2735 | if (ret) { | 2727 | * the mempolicy in a specific context [cpuset] at a later time. |
2736 | mpol_put(new); | 2728 | */ |
2737 | goto out; | 2729 | new->w.user_nodemask = nodes; |
2738 | } | 2730 | |
2739 | } | ||
2740 | err = 0; | 2731 | err = 0; |
2741 | 2732 | ||
2742 | out: | 2733 | out: |
@@ -2756,13 +2747,13 @@ out: | |||
2756 | * @buffer: to contain formatted mempolicy string | 2747 | * @buffer: to contain formatted mempolicy string |
2757 | * @maxlen: length of @buffer | 2748 | * @maxlen: length of @buffer |
2758 | * @pol: pointer to mempolicy to be formatted | 2749 | * @pol: pointer to mempolicy to be formatted |
2759 | * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask | 2750 | * @unused: redundant argument, to be removed later. |
2760 | * | 2751 | * |
2761 | * Convert a mempolicy into a string. | 2752 | * Convert a mempolicy into a string. |
2762 | * Returns the number of characters in buffer (if positive) | 2753 | * Returns the number of characters in buffer (if positive) |
2763 | * or an error (negative) | 2754 | * or an error (negative) |
2764 | */ | 2755 | */ |
2765 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | 2756 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused) |
2766 | { | 2757 | { |
2767 | char *p = buffer; | 2758 | char *p = buffer; |
2768 | int l; | 2759 | int l; |
@@ -2788,7 +2779,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2788 | case MPOL_PREFERRED: | 2779 | case MPOL_PREFERRED: |
2789 | nodes_clear(nodes); | 2780 | nodes_clear(nodes); |
2790 | if (flags & MPOL_F_LOCAL) | 2781 | if (flags & MPOL_F_LOCAL) |
2791 | mode = MPOL_LOCAL; /* pseudo-policy */ | 2782 | mode = MPOL_LOCAL; |
2792 | else | 2783 | else |
2793 | node_set(pol->v.preferred_node, nodes); | 2784 | node_set(pol->v.preferred_node, nodes); |
2794 | break; | 2785 | break; |
@@ -2796,10 +2787,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2796 | case MPOL_BIND: | 2787 | case MPOL_BIND: |
2797 | /* Fall through */ | 2788 | /* Fall through */ |
2798 | case MPOL_INTERLEAVE: | 2789 | case MPOL_INTERLEAVE: |
2799 | if (no_context) | 2790 | nodes = pol->v.nodes; |
2800 | nodes = pol->w.user_nodemask; | ||
2801 | else | ||
2802 | nodes = pol->v.nodes; | ||
2803 | break; | 2791 | break; |
2804 | 2792 | ||
2805 | default: | 2793 | default: |