diff options
| author | Hugh Dickins <hughd@google.com> | 2013-01-02 05:01:33 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-02 12:27:10 -0500 |
| commit | f2a07f40dbc603c15f8b06e6ec7f768af67b424f (patch) | |
| tree | 965f2db39cbb57c55da3ebfc31249e26eb746906 | |
| parent | 128dd1759d96ad36c379240f8b9463e8acfd37a1 (diff) | |
tmpfs mempolicy: fix /proc/mounts corrupting memory
Recently I suggested using "mount -o remount,mpol=local /tmp" in NUMA
mempolicy testing. Very nasty. Reading /proc/mounts, /proc/pid/mounts
or /proc/pid/mountinfo may then corrupt one bit of kernel memory, often
in a page table (causing "Bad swap" or "Bad page map" warning or "Bad
pagetable" oops), sometimes in a vm_area_struct or rbnode or somewhere
worse. "mpol=prefer" and "mpol=prefer:Node" are equally toxic.
Recent NUMA enhancements are not to blame: this dates back to 2.6.35,
when commit e17f74af351c "mempolicy: don't call mpol_set_nodemask() when
no_context" skipped mpol_parse_str()'s call to mpol_set_nodemask(),
which used to initialize v.preferred_node, or set MPOL_F_LOCAL in flags.
With slab poisoning, you can then rely on mpol_to_str() to set the bit
for node 0x6b6b, probably in the next page above the caller's stack.
mpol_parse_str() is only called from shmem_parse_options(): no_context
is always true, so call it unused for now, and remove !no_context code.
Set v.nodes or v.preferred_node or MPOL_F_LOCAL as mpol_to_str() might
expect. Then mpol_to_str() can ignore its no_context argument also,
the mpol being appropriately initialized whether contextualized or not.
Rename its no_context unused too, and let subsequent patch remove them
(that's not needed for stable backporting, which would involve rejects).
I don't understand why MPOL_LOCAL is described as a pseudo-policy:
it's a reasonable policy which suffers from a confusing implementation
in terms of MPOL_PREFERRED with MPOL_F_LOCAL. I believe this would be
much more robust if MPOL_LOCAL were recognized in switch statements
throughout, MPOL_F_LOCAL deleted, and MPOL_PREFERRED use the (possibly
empty) nodes mask like everyone else, instead of its preferred_node
variant (I presume an optimization from the days before MPOL_LOCAL).
But that would take me too long to get right and fully tested.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/mempolicy.c | 64 |
1 files changed, 26 insertions, 38 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d1b315e98627..02c914cca53d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -2595,8 +2595,7 @@ void numa_default_policy(void) | |||
| 2595 | */ | 2595 | */ |
| 2596 | 2596 | ||
| 2597 | /* | 2597 | /* |
| 2598 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2598 | * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag. |
| 2599 | * Used only for mpol_parse_str() and mpol_to_str() | ||
| 2600 | */ | 2599 | */ |
| 2601 | static const char * const policy_modes[] = | 2600 | static const char * const policy_modes[] = |
| 2602 | { | 2601 | { |
| @@ -2610,28 +2609,21 @@ static const char * const policy_modes[] = | |||
| 2610 | 2609 | ||
| 2611 | #ifdef CONFIG_TMPFS | 2610 | #ifdef CONFIG_TMPFS |
| 2612 | /** | 2611 | /** |
| 2613 | * mpol_parse_str - parse string to mempolicy | 2612 | * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. |
| 2614 | * @str: string containing mempolicy to parse | 2613 | * @str: string containing mempolicy to parse |
| 2615 | * @mpol: pointer to struct mempolicy pointer, returned on success. | 2614 | * @mpol: pointer to struct mempolicy pointer, returned on success. |
| 2616 | * @no_context: flag whether to "contextualize" the mempolicy | 2615 | * @unused: redundant argument, to be removed later. |
| 2617 | * | 2616 | * |
| 2618 | * Format of input: | 2617 | * Format of input: |
| 2619 | * <mode>[=<flags>][:<nodelist>] | 2618 | * <mode>[=<flags>][:<nodelist>] |
| 2620 | * | 2619 | * |
| 2621 | * if @no_context is true, save the input nodemask in w.user_nodemask in | ||
| 2622 | * the returned mempolicy. This will be used to "clone" the mempolicy in | ||
| 2623 | * a specific context [cpuset] at a later time. Used to parse tmpfs mpol | ||
| 2624 | * mount option. Note that if 'static' or 'relative' mode flags were | ||
| 2625 | * specified, the input nodemask will already have been saved. Saving | ||
| 2626 | * it again is redundant, but safe. | ||
| 2627 | * | ||
| 2628 | * On success, returns 0, else 1 | 2620 | * On success, returns 0, else 1 |
| 2629 | */ | 2621 | */ |
| 2630 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2622 | int mpol_parse_str(char *str, struct mempolicy **mpol, int unused) |
| 2631 | { | 2623 | { |
| 2632 | struct mempolicy *new = NULL; | 2624 | struct mempolicy *new = NULL; |
| 2633 | unsigned short mode; | 2625 | unsigned short mode; |
| 2634 | unsigned short uninitialized_var(mode_flags); | 2626 | unsigned short mode_flags; |
| 2635 | nodemask_t nodes; | 2627 | nodemask_t nodes; |
| 2636 | char *nodelist = strchr(str, ':'); | 2628 | char *nodelist = strchr(str, ':'); |
| 2637 | char *flags = strchr(str, '='); | 2629 | char *flags = strchr(str, '='); |
| @@ -2719,24 +2711,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
| 2719 | if (IS_ERR(new)) | 2711 | if (IS_ERR(new)) |
| 2720 | goto out; | 2712 | goto out; |
| 2721 | 2713 | ||
| 2722 | if (no_context) { | 2714 | /* |
| 2723 | /* save for contextualization */ | 2715 | * Save nodes for mpol_to_str() to show the tmpfs mount options |
| 2724 | new->w.user_nodemask = nodes; | 2716 | * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. |
| 2725 | } else { | 2717 | */ |
| 2726 | int ret; | 2718 | if (mode != MPOL_PREFERRED) |
| 2727 | NODEMASK_SCRATCH(scratch); | 2719 | new->v.nodes = nodes; |
| 2728 | if (scratch) { | 2720 | else if (nodelist) |
| 2729 | task_lock(current); | 2721 | new->v.preferred_node = first_node(nodes); |
| 2730 | ret = mpol_set_nodemask(new, &nodes, scratch); | 2722 | else |
| 2731 | task_unlock(current); | 2723 | new->flags |= MPOL_F_LOCAL; |
| 2732 | } else | 2724 | |
| 2733 | ret = -ENOMEM; | 2725 | /* |
| 2734 | NODEMASK_SCRATCH_FREE(scratch); | 2726 | * Save nodes for contextualization: this will be used to "clone" |
| 2735 | if (ret) { | 2727 | * the mempolicy in a specific context [cpuset] at a later time. |
| 2736 | mpol_put(new); | 2728 | */ |
| 2737 | goto out; | 2729 | new->w.user_nodemask = nodes; |
| 2738 | } | 2730 | |
| 2739 | } | ||
| 2740 | err = 0; | 2731 | err = 0; |
| 2741 | 2732 | ||
| 2742 | out: | 2733 | out: |
| @@ -2756,13 +2747,13 @@ out: | |||
| 2756 | * @buffer: to contain formatted mempolicy string | 2747 | * @buffer: to contain formatted mempolicy string |
| 2757 | * @maxlen: length of @buffer | 2748 | * @maxlen: length of @buffer |
| 2758 | * @pol: pointer to mempolicy to be formatted | 2749 | * @pol: pointer to mempolicy to be formatted |
| 2759 | * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask | 2750 | * @unused: redundant argument, to be removed later. |
| 2760 | * | 2751 | * |
| 2761 | * Convert a mempolicy into a string. | 2752 | * Convert a mempolicy into a string. |
| 2762 | * Returns the number of characters in buffer (if positive) | 2753 | * Returns the number of characters in buffer (if positive) |
| 2763 | * or an error (negative) | 2754 | * or an error (negative) |
| 2764 | */ | 2755 | */ |
| 2765 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | 2756 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused) |
| 2766 | { | 2757 | { |
| 2767 | char *p = buffer; | 2758 | char *p = buffer; |
| 2768 | int l; | 2759 | int l; |
| @@ -2788,7 +2779,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
| 2788 | case MPOL_PREFERRED: | 2779 | case MPOL_PREFERRED: |
| 2789 | nodes_clear(nodes); | 2780 | nodes_clear(nodes); |
| 2790 | if (flags & MPOL_F_LOCAL) | 2781 | if (flags & MPOL_F_LOCAL) |
| 2791 | mode = MPOL_LOCAL; /* pseudo-policy */ | 2782 | mode = MPOL_LOCAL; |
| 2792 | else | 2783 | else |
| 2793 | node_set(pol->v.preferred_node, nodes); | 2784 | node_set(pol->v.preferred_node, nodes); |
| 2794 | break; | 2785 | break; |
| @@ -2796,10 +2787,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
| 2796 | case MPOL_BIND: | 2787 | case MPOL_BIND: |
| 2797 | /* Fall through */ | 2788 | /* Fall through */ |
| 2798 | case MPOL_INTERLEAVE: | 2789 | case MPOL_INTERLEAVE: |
| 2799 | if (no_context) | 2790 | nodes = pol->v.nodes; |
| 2800 | nodes = pol->w.user_nodemask; | ||
| 2801 | else | ||
| 2802 | nodes = pol->v.nodes; | ||
| 2803 | break; | 2791 | break; |
| 2804 | 2792 | ||
| 2805 | default: | 2793 | default: |
