summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorDaniel Jordan <daniel.m.jordan@oracle.com>2019-03-05 18:48:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:19 -0500
commitc10d38cc8d3e43f946b6c2bf4602c86791587f30 (patch)
treefceb58ebe239d9c4142ed1f0062616fc2f0c9da5 /mm/swapfile.c
parent060f005f074791ec15e3ea111a0b0cac28abab06 (diff)
mm, swap: bounds check swap_info array accesses to avoid NULL derefs
Dan Carpenter reports a potential NULL dereference in get_swap_page_of_type: Smatch complains that the NULL checks on "si" aren't consistent. This seems like a real bug because we have not ensured that the type is valid and so "si" can be NULL. Add the missing check for NULL, taking care to use a read barrier to ensure CPU1 observes CPU0's updates in the correct order: CPU0 CPU1 alloc_swap_info() if (type >= nr_swapfiles) swap_info[type] = p /* handle invalid entry */ smp_wmb() smp_rmb() ++nr_swapfiles p = swap_info[type] Without smp_rmb, CPU1 might observe CPU0's write to nr_swapfiles before CPU0's write to swap_info[type] and read NULL from swap_info[type]. Ying Huang noticed other places in swapfile.c don't order these reads properly. Introduce swap_type_to_swap_info to encourage correct usage. Use READ_ONCE and WRITE_ONCE to follow the Linux Kernel Memory Model (see tools/memory-model/Documentation/explanation.txt). This ordering need not be enforced in places where swap_lock is held (e.g. si_swapinfo) because swap_lock serializes updates to nr_swapfiles and the swap_info array. Link: http://lkml.kernel.org/r/20190131024410.29859-1-daniel.m.jordan@oracle.com Fixes: ec8acf20afb8 ("swap: add per-partition lock for swapfile") Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Suggested-by: "Huang, Ying" <ying.huang@intel.com> Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Andi Kleen <ak@linux.intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Omar Sandoval <osandov@fb.com> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Shaohua Li <shli@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Tejun Heo <tj@kernel.org> Cc: Will Deacon <will.deacon@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c51
1 files changed, 29 insertions, 22 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6de46984d59d..57e9b1b31d55 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -98,6 +98,15 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0);
98 98
99atomic_t nr_rotate_swap = ATOMIC_INIT(0); 99atomic_t nr_rotate_swap = ATOMIC_INIT(0);
100 100
101static struct swap_info_struct *swap_type_to_swap_info(int type)
102{
103 if (type >= READ_ONCE(nr_swapfiles))
104 return NULL;
105
106 smp_rmb(); /* Pairs with smp_wmb in alloc_swap_info. */
107 return READ_ONCE(swap_info[type]);
108}
109
101static inline unsigned char swap_count(unsigned char ent) 110static inline unsigned char swap_count(unsigned char ent)
102{ 111{
103 return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ 112 return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */
@@ -1044,12 +1053,14 @@ noswap:
1044/* The only caller of this function is now suspend routine */ 1053/* The only caller of this function is now suspend routine */
1045swp_entry_t get_swap_page_of_type(int type) 1054swp_entry_t get_swap_page_of_type(int type)
1046{ 1055{
1047 struct swap_info_struct *si; 1056 struct swap_info_struct *si = swap_type_to_swap_info(type);
1048 pgoff_t offset; 1057 pgoff_t offset;
1049 1058
1050 si = swap_info[type]; 1059 if (!si)
1060 goto fail;
1061
1051 spin_lock(&si->lock); 1062 spin_lock(&si->lock);
1052 if (si && (si->flags & SWP_WRITEOK)) { 1063 if (si->flags & SWP_WRITEOK) {
1053 atomic_long_dec(&nr_swap_pages); 1064 atomic_long_dec(&nr_swap_pages);
1054 /* This is called for allocating swap entry, not cache */ 1065 /* This is called for allocating swap entry, not cache */
1055 offset = scan_swap_map(si, 1); 1066 offset = scan_swap_map(si, 1);
@@ -1060,6 +1071,7 @@ swp_entry_t get_swap_page_of_type(int type)
1060 atomic_long_inc(&nr_swap_pages); 1071 atomic_long_inc(&nr_swap_pages);
1061 } 1072 }
1062 spin_unlock(&si->lock); 1073 spin_unlock(&si->lock);
1074fail:
1063 return (swp_entry_t) {0}; 1075 return (swp_entry_t) {0};
1064} 1076}
1065 1077
@@ -1071,9 +1083,9 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
1071 if (!entry.val) 1083 if (!entry.val)
1072 goto out; 1084 goto out;
1073 type = swp_type(entry); 1085 type = swp_type(entry);
1074 if (type >= nr_swapfiles) 1086 p = swap_type_to_swap_info(type);
1087 if (!p)
1075 goto bad_nofile; 1088 goto bad_nofile;
1076 p = swap_info[type];
1077 if (!(p->flags & SWP_USED)) 1089 if (!(p->flags & SWP_USED))
1078 goto bad_device; 1090 goto bad_device;
1079 offset = swp_offset(entry); 1091 offset = swp_offset(entry);
@@ -1697,10 +1709,9 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
1697sector_t swapdev_block(int type, pgoff_t offset) 1709sector_t swapdev_block(int type, pgoff_t offset)
1698{ 1710{
1699 struct block_device *bdev; 1711 struct block_device *bdev;
1712 struct swap_info_struct *si = swap_type_to_swap_info(type);
1700 1713
1701 if ((unsigned int)type >= nr_swapfiles) 1714 if (!si || !(si->flags & SWP_WRITEOK))
1702 return 0;
1703 if (!(swap_info[type]->flags & SWP_WRITEOK))
1704 return 0; 1715 return 0;
1705 return map_swap_entry(swp_entry(type, offset), &bdev); 1716 return map_swap_entry(swp_entry(type, offset), &bdev);
1706} 1717}
@@ -2151,7 +2162,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
2151 struct swap_extent *se; 2162 struct swap_extent *se;
2152 pgoff_t offset; 2163 pgoff_t offset;
2153 2164
2154 sis = swap_info[swp_type(entry)]; 2165 sis = swp_swap_info(entry);
2155 *bdev = sis->bdev; 2166 *bdev = sis->bdev;
2156 2167
2157 offset = swp_offset(entry); 2168 offset = swp_offset(entry);
@@ -2593,9 +2604,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
2593 if (!l) 2604 if (!l)
2594 return SEQ_START_TOKEN; 2605 return SEQ_START_TOKEN;
2595 2606
2596 for (type = 0; type < nr_swapfiles; type++) { 2607 for (type = 0; (si = swap_type_to_swap_info(type)); type++) {
2597 smp_rmb(); /* read nr_swapfiles before swap_info[type] */
2598 si = swap_info[type];
2599 if (!(si->flags & SWP_USED) || !si->swap_map) 2608 if (!(si->flags & SWP_USED) || !si->swap_map)
2600 continue; 2609 continue;
2601 if (!--l) 2610 if (!--l)
@@ -2615,9 +2624,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
2615 else 2624 else
2616 type = si->type + 1; 2625 type = si->type + 1;
2617 2626
2618 for (; type < nr_swapfiles; type++) { 2627 for (; (si = swap_type_to_swap_info(type)); type++) {
2619 smp_rmb(); /* read nr_swapfiles before swap_info[type] */
2620 si = swap_info[type];
2621 if (!(si->flags & SWP_USED) || !si->swap_map) 2628 if (!(si->flags & SWP_USED) || !si->swap_map)
2622 continue; 2629 continue;
2623 ++*pos; 2630 ++*pos;
@@ -2724,14 +2731,14 @@ static struct swap_info_struct *alloc_swap_info(void)
2724 } 2731 }
2725 if (type >= nr_swapfiles) { 2732 if (type >= nr_swapfiles) {
2726 p->type = type; 2733 p->type = type;
2727 swap_info[type] = p; 2734 WRITE_ONCE(swap_info[type], p);
2728 /* 2735 /*
2729 * Write swap_info[type] before nr_swapfiles, in case a 2736 * Write swap_info[type] before nr_swapfiles, in case a
2730 * racing procfs swap_start() or swap_next() is reading them. 2737 * racing procfs swap_start() or swap_next() is reading them.
2731 * (We never shrink nr_swapfiles, we never free this entry.) 2738 * (We never shrink nr_swapfiles, we never free this entry.)
2732 */ 2739 */
2733 smp_wmb(); 2740 smp_wmb();
2734 nr_swapfiles++; 2741 WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
2735 } else { 2742 } else {
2736 kvfree(p); 2743 kvfree(p);
2737 p = swap_info[type]; 2744 p = swap_info[type];
@@ -3251,7 +3258,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3251{ 3258{
3252 struct swap_info_struct *p; 3259 struct swap_info_struct *p;
3253 struct swap_cluster_info *ci; 3260 struct swap_cluster_info *ci;
3254 unsigned long offset, type; 3261 unsigned long offset;
3255 unsigned char count; 3262 unsigned char count;
3256 unsigned char has_cache; 3263 unsigned char has_cache;
3257 int err = -EINVAL; 3264 int err = -EINVAL;
@@ -3259,10 +3266,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3259 if (non_swap_entry(entry)) 3266 if (non_swap_entry(entry))
3260 goto out; 3267 goto out;
3261 3268
3262 type = swp_type(entry); 3269 p = swp_swap_info(entry);
3263 if (type >= nr_swapfiles) 3270 if (!p)
3264 goto bad_file; 3271 goto bad_file;
3265 p = swap_info[type]; 3272
3266 offset = swp_offset(entry); 3273 offset = swp_offset(entry);
3267 if (unlikely(offset >= p->max)) 3274 if (unlikely(offset >= p->max))
3268 goto out; 3275 goto out;
@@ -3359,7 +3366,7 @@ int swapcache_prepare(swp_entry_t entry)
3359 3366
3360struct swap_info_struct *swp_swap_info(swp_entry_t entry) 3367struct swap_info_struct *swp_swap_info(swp_entry_t entry)
3361{ 3368{
3362 return swap_info[swp_type(entry)]; 3369 return swap_type_to_swap_info(swp_type(entry));
3363} 3370}
3364 3371
3365struct swap_info_struct *page_swap_info(struct page *page) 3372struct swap_info_struct *page_swap_info(struct page *page)