diff options
author | Daniel Jordan <daniel.m.jordan@oracle.com> | 2019-03-05 18:48:19 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-06 00:07:19 -0500 |
commit | c10d38cc8d3e43f946b6c2bf4602c86791587f30 (patch) | |
tree | fceb58ebe239d9c4142ed1f0062616fc2f0c9da5 /mm/swapfile.c | |
parent | 060f005f074791ec15e3ea111a0b0cac28abab06 (diff) |
mm, swap: bounds check swap_info array accesses to avoid NULL derefs
Dan Carpenter reports a potential NULL dereference in
get_swap_page_of_type:
Smatch complains that the NULL checks on "si" aren't consistent. This
seems like a real bug because we have not ensured that the type is
valid and so "si" can be NULL.
Add the missing check for NULL, taking care to use a read barrier to
ensure CPU1 observes CPU0's updates in the correct order:
CPU0 CPU1
alloc_swap_info() if (type >= nr_swapfiles)
swap_info[type] = p /* handle invalid entry */
smp_wmb() smp_rmb()
++nr_swapfiles p = swap_info[type]
Without smp_rmb, CPU1 might observe CPU0's write to nr_swapfiles before
CPU0's write to swap_info[type] and read NULL from swap_info[type].
Ying Huang noticed other places in swapfile.c don't order these reads
properly. Introduce swap_type_to_swap_info to encourage correct usage.
Use READ_ONCE and WRITE_ONCE to follow the Linux Kernel Memory Model
(see tools/memory-model/Documentation/explanation.txt).
This ordering need not be enforced in places where swap_lock is held
(e.g. si_swapinfo) because swap_lock serializes updates to nr_swapfiles
and the swap_info array.
Link: http://lkml.kernel.org/r/20190131024410.29859-1-daniel.m.jordan@oracle.com
Fixes: ec8acf20afb8 ("swap: add per-partition lock for swapfile")
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 51 |
1 files changed, 29 insertions, 22 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6de46984d59d..57e9b1b31d55 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -98,6 +98,15 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0); | |||
98 | 98 | ||
99 | atomic_t nr_rotate_swap = ATOMIC_INIT(0); | 99 | atomic_t nr_rotate_swap = ATOMIC_INIT(0); |
100 | 100 | ||
101 | static struct swap_info_struct *swap_type_to_swap_info(int type) | ||
102 | { | ||
103 | if (type >= READ_ONCE(nr_swapfiles)) | ||
104 | return NULL; | ||
105 | |||
106 | smp_rmb(); /* Pairs with smp_wmb in alloc_swap_info. */ | ||
107 | return READ_ONCE(swap_info[type]); | ||
108 | } | ||
109 | |||
101 | static inline unsigned char swap_count(unsigned char ent) | 110 | static inline unsigned char swap_count(unsigned char ent) |
102 | { | 111 | { |
103 | return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ | 112 | return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ |
@@ -1044,12 +1053,14 @@ noswap: | |||
1044 | /* The only caller of this function is now suspend routine */ | 1053 | /* The only caller of this function is now suspend routine */ |
1045 | swp_entry_t get_swap_page_of_type(int type) | 1054 | swp_entry_t get_swap_page_of_type(int type) |
1046 | { | 1055 | { |
1047 | struct swap_info_struct *si; | 1056 | struct swap_info_struct *si = swap_type_to_swap_info(type); |
1048 | pgoff_t offset; | 1057 | pgoff_t offset; |
1049 | 1058 | ||
1050 | si = swap_info[type]; | 1059 | if (!si) |
1060 | goto fail; | ||
1061 | |||
1051 | spin_lock(&si->lock); | 1062 | spin_lock(&si->lock); |
1052 | if (si && (si->flags & SWP_WRITEOK)) { | 1063 | if (si->flags & SWP_WRITEOK) { |
1053 | atomic_long_dec(&nr_swap_pages); | 1064 | atomic_long_dec(&nr_swap_pages); |
1054 | /* This is called for allocating swap entry, not cache */ | 1065 | /* This is called for allocating swap entry, not cache */ |
1055 | offset = scan_swap_map(si, 1); | 1066 | offset = scan_swap_map(si, 1); |
@@ -1060,6 +1071,7 @@ swp_entry_t get_swap_page_of_type(int type) | |||
1060 | atomic_long_inc(&nr_swap_pages); | 1071 | atomic_long_inc(&nr_swap_pages); |
1061 | } | 1072 | } |
1062 | spin_unlock(&si->lock); | 1073 | spin_unlock(&si->lock); |
1074 | fail: | ||
1063 | return (swp_entry_t) {0}; | 1075 | return (swp_entry_t) {0}; |
1064 | } | 1076 | } |
1065 | 1077 | ||
@@ -1071,9 +1083,9 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) | |||
1071 | if (!entry.val) | 1083 | if (!entry.val) |
1072 | goto out; | 1084 | goto out; |
1073 | type = swp_type(entry); | 1085 | type = swp_type(entry); |
1074 | if (type >= nr_swapfiles) | 1086 | p = swap_type_to_swap_info(type); |
1087 | if (!p) | ||
1075 | goto bad_nofile; | 1088 | goto bad_nofile; |
1076 | p = swap_info[type]; | ||
1077 | if (!(p->flags & SWP_USED)) | 1089 | if (!(p->flags & SWP_USED)) |
1078 | goto bad_device; | 1090 | goto bad_device; |
1079 | offset = swp_offset(entry); | 1091 | offset = swp_offset(entry); |
@@ -1697,10 +1709,9 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
1697 | sector_t swapdev_block(int type, pgoff_t offset) | 1709 | sector_t swapdev_block(int type, pgoff_t offset) |
1698 | { | 1710 | { |
1699 | struct block_device *bdev; | 1711 | struct block_device *bdev; |
1712 | struct swap_info_struct *si = swap_type_to_swap_info(type); | ||
1700 | 1713 | ||
1701 | if ((unsigned int)type >= nr_swapfiles) | 1714 | if (!si || !(si->flags & SWP_WRITEOK)) |
1702 | return 0; | ||
1703 | if (!(swap_info[type]->flags & SWP_WRITEOK)) | ||
1704 | return 0; | 1715 | return 0; |
1705 | return map_swap_entry(swp_entry(type, offset), &bdev); | 1716 | return map_swap_entry(swp_entry(type, offset), &bdev); |
1706 | } | 1717 | } |
@@ -2151,7 +2162,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |||
2151 | struct swap_extent *se; | 2162 | struct swap_extent *se; |
2152 | pgoff_t offset; | 2163 | pgoff_t offset; |
2153 | 2164 | ||
2154 | sis = swap_info[swp_type(entry)]; | 2165 | sis = swp_swap_info(entry); |
2155 | *bdev = sis->bdev; | 2166 | *bdev = sis->bdev; |
2156 | 2167 | ||
2157 | offset = swp_offset(entry); | 2168 | offset = swp_offset(entry); |
@@ -2593,9 +2604,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos) | |||
2593 | if (!l) | 2604 | if (!l) |
2594 | return SEQ_START_TOKEN; | 2605 | return SEQ_START_TOKEN; |
2595 | 2606 | ||
2596 | for (type = 0; type < nr_swapfiles; type++) { | 2607 | for (type = 0; (si = swap_type_to_swap_info(type)); type++) { |
2597 | smp_rmb(); /* read nr_swapfiles before swap_info[type] */ | ||
2598 | si = swap_info[type]; | ||
2599 | if (!(si->flags & SWP_USED) || !si->swap_map) | 2608 | if (!(si->flags & SWP_USED) || !si->swap_map) |
2600 | continue; | 2609 | continue; |
2601 | if (!--l) | 2610 | if (!--l) |
@@ -2615,9 +2624,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos) | |||
2615 | else | 2624 | else |
2616 | type = si->type + 1; | 2625 | type = si->type + 1; |
2617 | 2626 | ||
2618 | for (; type < nr_swapfiles; type++) { | 2627 | for (; (si = swap_type_to_swap_info(type)); type++) { |
2619 | smp_rmb(); /* read nr_swapfiles before swap_info[type] */ | ||
2620 | si = swap_info[type]; | ||
2621 | if (!(si->flags & SWP_USED) || !si->swap_map) | 2628 | if (!(si->flags & SWP_USED) || !si->swap_map) |
2622 | continue; | 2629 | continue; |
2623 | ++*pos; | 2630 | ++*pos; |
@@ -2724,14 +2731,14 @@ static struct swap_info_struct *alloc_swap_info(void) | |||
2724 | } | 2731 | } |
2725 | if (type >= nr_swapfiles) { | 2732 | if (type >= nr_swapfiles) { |
2726 | p->type = type; | 2733 | p->type = type; |
2727 | swap_info[type] = p; | 2734 | WRITE_ONCE(swap_info[type], p); |
2728 | /* | 2735 | /* |
2729 | * Write swap_info[type] before nr_swapfiles, in case a | 2736 | * Write swap_info[type] before nr_swapfiles, in case a |
2730 | * racing procfs swap_start() or swap_next() is reading them. | 2737 | * racing procfs swap_start() or swap_next() is reading them. |
2731 | * (We never shrink nr_swapfiles, we never free this entry.) | 2738 | * (We never shrink nr_swapfiles, we never free this entry.) |
2732 | */ | 2739 | */ |
2733 | smp_wmb(); | 2740 | smp_wmb(); |
2734 | nr_swapfiles++; | 2741 | WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1); |
2735 | } else { | 2742 | } else { |
2736 | kvfree(p); | 2743 | kvfree(p); |
2737 | p = swap_info[type]; | 2744 | p = swap_info[type]; |
@@ -3251,7 +3258,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) | |||
3251 | { | 3258 | { |
3252 | struct swap_info_struct *p; | 3259 | struct swap_info_struct *p; |
3253 | struct swap_cluster_info *ci; | 3260 | struct swap_cluster_info *ci; |
3254 | unsigned long offset, type; | 3261 | unsigned long offset; |
3255 | unsigned char count; | 3262 | unsigned char count; |
3256 | unsigned char has_cache; | 3263 | unsigned char has_cache; |
3257 | int err = -EINVAL; | 3264 | int err = -EINVAL; |
@@ -3259,10 +3266,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) | |||
3259 | if (non_swap_entry(entry)) | 3266 | if (non_swap_entry(entry)) |
3260 | goto out; | 3267 | goto out; |
3261 | 3268 | ||
3262 | type = swp_type(entry); | 3269 | p = swp_swap_info(entry); |
3263 | if (type >= nr_swapfiles) | 3270 | if (!p) |
3264 | goto bad_file; | 3271 | goto bad_file; |
3265 | p = swap_info[type]; | 3272 | |
3266 | offset = swp_offset(entry); | 3273 | offset = swp_offset(entry); |
3267 | if (unlikely(offset >= p->max)) | 3274 | if (unlikely(offset >= p->max)) |
3268 | goto out; | 3275 | goto out; |
@@ -3359,7 +3366,7 @@ int swapcache_prepare(swp_entry_t entry) | |||
3359 | 3366 | ||
3360 | struct swap_info_struct *swp_swap_info(swp_entry_t entry) | 3367 | struct swap_info_struct *swp_swap_info(swp_entry_t entry) |
3361 | { | 3368 | { |
3362 | return swap_info[swp_type(entry)]; | 3369 | return swap_type_to_swap_info(swp_type(entry)); |
3363 | } | 3370 | } |
3364 | 3371 | ||
3365 | struct swap_info_struct *page_swap_info(struct page *page) | 3372 | struct swap_info_struct *page_swap_info(struct page *page) |