aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2017-02-22 18:45:39 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:30 -0500
commit67afa38e012e9581b9b42f2a41dfc56b1280794d (patch)
tree704ee023982fdcdc053c4950e26656888ca289f5
parent7c00bafee87c7bac7ed9eced7c161f8e5332cb4e (diff)
mm/swap: add cache for swap slots allocation
We add per cpu caches for swap slots that can be allocated and freed quickly without the need to touch the swap info lock. Two separate caches are maintained for swap slots allocated and swap slots returned. This is to allow the swap slots to be returned to the global pool in a batch so they will have a chance to be coaelesced with other slots in a cluster. We do not reuse the slots that are returned right away, as it may increase fragmentation of the slots. The swap allocation cache is protected by a mutex as we may sleep when searching for empty slots in cache. The swap free cache is protected by a spin lock as we cannot sleep in the free path. We refill the swap slots cache when we run out of slots, and we disable the swap slots cache and drain the slots if the global number of slots fall below a low watermark threshold. We re-enable the cache agian when the slots available are above a high watermark. [ying.huang@intel.com: use raw_cpu_ptr over this_cpu_ptr for swap slots access] [tim.c.chen@linux.intel.com: add comments on locks in swap_slots.h] Link: http://lkml.kernel.org/r/20170118180327.GA24225@linux.intel.com Link: http://lkml.kernel.org/r/35de301a4eaa8daa2977de6e987f2c154385eb66.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Reviewed-by: Michal Hocko <mhocko@suse.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> escreveu: Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/swap_slots.h28
-rw-r--r--mm/Makefile2
-rw-r--r--mm/swap_slots.c342
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c26
6 files changed, 391 insertions, 12 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bcc0b18f96d2..45e91dd6716d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -372,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
372/* linux/mm/swapfile.c */ 372/* linux/mm/swapfile.c */
373extern atomic_long_t nr_swap_pages; 373extern atomic_long_t nr_swap_pages;
374extern long total_swap_pages; 374extern long total_swap_pages;
375extern bool has_usable_swap(void);
375 376
376/* Swap 50% full? Release swapcache more aggressively.. */ 377/* Swap 50% full? Release swapcache more aggressively.. */
377static inline bool vm_swap_full(void) 378static inline bool vm_swap_full(void)
@@ -410,6 +411,9 @@ struct backing_dev_info;
410extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); 411extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
411extern void exit_swap_address_space(unsigned int type); 412extern void exit_swap_address_space(unsigned int type);
412 413
414extern int get_swap_slots(int n, swp_entry_t *slots);
415extern void swapcache_free_batch(swp_entry_t *entries, int n);
416
413#else /* CONFIG_SWAP */ 417#else /* CONFIG_SWAP */
414 418
415#define swap_address_space(entry) (NULL) 419#define swap_address_space(entry) (NULL)
diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
new file mode 100644
index 000000000000..ba5623b27c60
--- /dev/null
+++ b/include/linux/swap_slots.h
@@ -0,0 +1,28 @@
1#ifndef _LINUX_SWAP_SLOTS_H
2#define _LINUX_SWAP_SLOTS_H
3
4#include <linux/swap.h>
5#include <linux/spinlock.h>
6#include <linux/mutex.h>
7
8#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
9#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
10#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
11
12struct swap_slots_cache {
13 bool lock_initialized;
14 struct mutex alloc_lock; /* protects slots, nr, cur */
15 swp_entry_t *slots;
16 int nr;
17 int cur;
18 spinlock_t free_lock; /* protects slots_ret, n_ret */
19 swp_entry_t *slots_ret;
20 int n_ret;
21};
22
23void disable_swap_slots_cache_lock(void);
24void reenable_swap_slots_cache_unlock(void);
25int enable_swap_slots_cache(void);
26int free_swap_slot(swp_entry_t entry);
27
28#endif /* _LINUX_SWAP_SLOTS_H */
diff --git a/mm/Makefile b/mm/Makefile
index 295bd7a9f76b..433eaf9a876e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -35,7 +35,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
35 readahead.o swap.o truncate.o vmscan.o shmem.o \ 35 readahead.o swap.o truncate.o vmscan.o shmem.o \
36 util.o mmzone.o vmstat.o backing-dev.o \ 36 util.o mmzone.o vmstat.o backing-dev.o \
37 mm_init.o mmu_context.o percpu.o slab_common.o \ 37 mm_init.o mmu_context.o percpu.o slab_common.o \
38 compaction.o vmacache.o \ 38 compaction.o vmacache.o swap_slots.o \
39 interval_tree.o list_lru.o workingset.o \ 39 interval_tree.o list_lru.o workingset.o \
40 debug.o $(mmu-y) 40 debug.o $(mmu-y)
41 41
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
new file mode 100644
index 000000000000..ebf4f1cbac04
--- /dev/null
+++ b/mm/swap_slots.c
@@ -0,0 +1,342 @@
1/*
2 * Manage cache of swap slots to be used for and returned from
3 * swap.
4 *
5 * Copyright(c) 2016 Intel Corporation.
6 *
7 * Author: Tim Chen <tim.c.chen@linux.intel.com>
8 *
9 * We allocate the swap slots from the global pool and put
10 * it into local per cpu caches. This has the advantage
11 * of no needing to acquire the swap_info lock every time
12 * we need a new slot.
13 *
14 * There is also opportunity to simply return the slot
15 * to local caches without needing to acquire swap_info
16 * lock. We do not reuse the returned slots directly but
17 * move them back to the global pool in a batch. This
18 * allows the slots to coaellesce and reduce fragmentation.
19 *
20 * The swap entry allocated is marked with SWAP_HAS_CACHE
21 * flag in map_count that prevents it from being allocated
22 * again from the global pool.
23 *
24 * The swap slots cache is protected by a mutex instead of
25 * a spin lock as when we search for slots with scan_swap_map,
26 * we can possibly sleep.
27 */
28
29#include <linux/swap_slots.h>
30#include <linux/cpu.h>
31#include <linux/cpumask.h>
32#include <linux/vmalloc.h>
33#include <linux/mutex.h>
34
35#ifdef CONFIG_SWAP
36
37static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
38static bool swap_slot_cache_active;
39static bool swap_slot_cache_enabled;
40static bool swap_slot_cache_initialized;
41DEFINE_MUTEX(swap_slots_cache_mutex);
42/* Serialize swap slots cache enable/disable operations */
43DEFINE_MUTEX(swap_slots_cache_enable_mutex);
44
45static void __drain_swap_slots_cache(unsigned int type);
46static void deactivate_swap_slots_cache(void);
47static void reactivate_swap_slots_cache(void);
48
49#define use_swap_slot_cache (swap_slot_cache_active && \
50 swap_slot_cache_enabled && swap_slot_cache_initialized)
51#define SLOTS_CACHE 0x1
52#define SLOTS_CACHE_RET 0x2
53
54static void deactivate_swap_slots_cache(void)
55{
56 mutex_lock(&swap_slots_cache_mutex);
57 swap_slot_cache_active = false;
58 __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
59 mutex_unlock(&swap_slots_cache_mutex);
60}
61
62static void reactivate_swap_slots_cache(void)
63{
64 mutex_lock(&swap_slots_cache_mutex);
65 swap_slot_cache_active = true;
66 mutex_unlock(&swap_slots_cache_mutex);
67}
68
69/* Must not be called with cpu hot plug lock */
70void disable_swap_slots_cache_lock(void)
71{
72 mutex_lock(&swap_slots_cache_enable_mutex);
73 swap_slot_cache_enabled = false;
74 if (swap_slot_cache_initialized) {
75 /* serialize with cpu hotplug operations */
76 get_online_cpus();
77 __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
78 put_online_cpus();
79 }
80}
81
82static void __reenable_swap_slots_cache(void)
83{
84 swap_slot_cache_enabled = has_usable_swap();
85}
86
87void reenable_swap_slots_cache_unlock(void)
88{
89 __reenable_swap_slots_cache();
90 mutex_unlock(&swap_slots_cache_enable_mutex);
91}
92
93static bool check_cache_active(void)
94{
95 long pages;
96
97 if (!swap_slot_cache_enabled || !swap_slot_cache_initialized)
98 return false;
99
100 pages = get_nr_swap_pages();
101 if (!swap_slot_cache_active) {
102 if (pages > num_online_cpus() *
103 THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
104 reactivate_swap_slots_cache();
105 goto out;
106 }
107
108 /* if global pool of slot caches too low, deactivate cache */
109 if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
110 deactivate_swap_slots_cache();
111out:
112 return swap_slot_cache_active;
113}
114
115static int alloc_swap_slot_cache(unsigned int cpu)
116{
117 struct swap_slots_cache *cache;
118 swp_entry_t *slots, *slots_ret;
119
120 /*
121 * Do allocation outside swap_slots_cache_mutex
122 * as vzalloc could trigger reclaim and get_swap_page,
123 * which can lock swap_slots_cache_mutex.
124 */
125 slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
126 if (!slots)
127 return -ENOMEM;
128
129 slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
130 if (!slots_ret) {
131 vfree(slots);
132 return -ENOMEM;
133 }
134
135 mutex_lock(&swap_slots_cache_mutex);
136 cache = &per_cpu(swp_slots, cpu);
137 if (cache->slots || cache->slots_ret)
138 /* cache already allocated */
139 goto out;
140 if (!cache->lock_initialized) {
141 mutex_init(&cache->alloc_lock);
142 spin_lock_init(&cache->free_lock);
143 cache->lock_initialized = true;
144 }
145 cache->nr = 0;
146 cache->cur = 0;
147 cache->n_ret = 0;
148 cache->slots = slots;
149 slots = NULL;
150 cache->slots_ret = slots_ret;
151 slots_ret = NULL;
152out:
153 mutex_unlock(&swap_slots_cache_mutex);
154 if (slots)
155 vfree(slots);
156 if (slots_ret)
157 vfree(slots_ret);
158 return 0;
159}
160
161static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
162 bool free_slots)
163{
164 struct swap_slots_cache *cache;
165 swp_entry_t *slots = NULL;
166
167 cache = &per_cpu(swp_slots, cpu);
168 if ((type & SLOTS_CACHE) && cache->slots) {
169 mutex_lock(&cache->alloc_lock);
170 swapcache_free_entries(cache->slots + cache->cur, cache->nr);
171 cache->cur = 0;
172 cache->nr = 0;
173 if (free_slots && cache->slots) {
174 vfree(cache->slots);
175 cache->slots = NULL;
176 }
177 mutex_unlock(&cache->alloc_lock);
178 }
179 if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
180 spin_lock_irq(&cache->free_lock);
181 swapcache_free_entries(cache->slots_ret, cache->n_ret);
182 cache->n_ret = 0;
183 if (free_slots && cache->slots_ret) {
184 slots = cache->slots_ret;
185 cache->slots_ret = NULL;
186 }
187 spin_unlock_irq(&cache->free_lock);
188 if (slots)
189 vfree(slots);
190 }
191}
192
193static void __drain_swap_slots_cache(unsigned int type)
194{
195 unsigned int cpu;
196
197 /*
198 * This function is called during
199 * 1) swapoff, when we have to make sure no
200 * left over slots are in cache when we remove
201 * a swap device;
202 * 2) disabling of swap slot cache, when we run low
203 * on swap slots when allocating memory and need
204 * to return swap slots to global pool.
205 *
206 * We cannot acquire cpu hot plug lock here as
207 * this function can be invoked in the cpu
208 * hot plug path:
209 * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
210 * -> memory allocation -> direct reclaim -> get_swap_page
211 * -> drain_swap_slots_cache
212 *
213 * Hence the loop over current online cpu below could miss cpu that
214 * is being brought online but not yet marked as online.
215 * That is okay as we do not schedule and run anything on a
216 * cpu before it has been marked online. Hence, we will not
217 * fill any swap slots in slots cache of such cpu.
218 * There are no slots on such cpu that need to be drained.
219 */
220 for_each_online_cpu(cpu)
221 drain_slots_cache_cpu(cpu, type, false);
222}
223
224static int free_slot_cache(unsigned int cpu)
225{
226 mutex_lock(&swap_slots_cache_mutex);
227 drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
228 mutex_unlock(&swap_slots_cache_mutex);
229 return 0;
230}
231
232int enable_swap_slots_cache(void)
233{
234 int ret = 0;
235
236 mutex_lock(&swap_slots_cache_enable_mutex);
237 if (swap_slot_cache_initialized) {
238 __reenable_swap_slots_cache();
239 goto out_unlock;
240 }
241
242 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
243 alloc_swap_slot_cache, free_slot_cache);
244 if (ret < 0)
245 goto out_unlock;
246 swap_slot_cache_initialized = true;
247 __reenable_swap_slots_cache();
248out_unlock:
249 mutex_unlock(&swap_slots_cache_enable_mutex);
250 return 0;
251}
252
253/* called with swap slot cache's alloc lock held */
254static int refill_swap_slots_cache(struct swap_slots_cache *cache)
255{
256 if (!use_swap_slot_cache || cache->nr)
257 return 0;
258
259 cache->cur = 0;
260 if (swap_slot_cache_active)
261 cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots);
262
263 return cache->nr;
264}
265
266int free_swap_slot(swp_entry_t entry)
267{
268 struct swap_slots_cache *cache;
269
270 BUG_ON(!swap_slot_cache_initialized);
271
272 cache = &get_cpu_var(swp_slots);
273 if (use_swap_slot_cache && cache->slots_ret) {
274 spin_lock_irq(&cache->free_lock);
275 /* Swap slots cache may be deactivated before acquiring lock */
276 if (!use_swap_slot_cache) {
277 spin_unlock_irq(&cache->free_lock);
278 goto direct_free;
279 }
280 if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
281 /*
282 * Return slots to global pool.
283 * The current swap_map value is SWAP_HAS_CACHE.
284 * Set it to 0 to indicate it is available for
285 * allocation in global pool
286 */
287 swapcache_free_entries(cache->slots_ret, cache->n_ret);
288 cache->n_ret = 0;
289 }
290 cache->slots_ret[cache->n_ret++] = entry;
291 spin_unlock_irq(&cache->free_lock);
292 } else {
293direct_free:
294 swapcache_free_entries(&entry, 1);
295 }
296 put_cpu_var(swp_slots);
297
298 return 0;
299}
300
301swp_entry_t get_swap_page(void)
302{
303 swp_entry_t entry, *pentry;
304 struct swap_slots_cache *cache;
305
306 /*
307 * Preemption is allowed here, because we may sleep
308 * in refill_swap_slots_cache(). But it is safe, because
309 * accesses to the per-CPU data structure are protected by the
310 * mutex cache->alloc_lock.
311 *
312 * The alloc path here does not touch cache->slots_ret
313 * so cache->free_lock is not taken.
314 */
315 cache = raw_cpu_ptr(&swp_slots);
316
317 entry.val = 0;
318 if (check_cache_active()) {
319 mutex_lock(&cache->alloc_lock);
320 if (cache->slots) {
321repeat:
322 if (cache->nr) {
323 pentry = &cache->slots[cache->cur++];
324 entry = *pentry;
325 pentry->val = 0;
326 cache->nr--;
327 } else {
328 if (refill_swap_slots_cache(cache))
329 goto repeat;
330 }
331 }
332 mutex_unlock(&cache->alloc_lock);
333 if (entry.val)
334 return entry;
335 }
336
337 get_swap_pages(1, &entry);
338
339 return entry;
340}
341
342#endif /* CONFIG_SWAP */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3d76d80c07d6..e1f07cafecaa 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -18,6 +18,7 @@
18#include <linux/pagevec.h> 18#include <linux/pagevec.h>
19#include <linux/migrate.h> 19#include <linux/migrate.h>
20#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
21#include <linux/swap_slots.h>
21 22
22#include <asm/pgtable.h> 23#include <asm/pgtable.h>
23 24
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8b5bd34b1a00..30a90fd140b7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -34,6 +34,7 @@
34#include <linux/frontswap.h> 34#include <linux/frontswap.h>
35#include <linux/swapfile.h> 35#include <linux/swapfile.h>
36#include <linux/export.h> 36#include <linux/export.h>
37#include <linux/swap_slots.h>
37 38
38#include <asm/pgtable.h> 39#include <asm/pgtable.h>
39#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
@@ -854,14 +855,6 @@ noswap:
854 return n_ret; 855 return n_ret;
855} 856}
856 857
857swp_entry_t get_swap_page(void)
858{
859 swp_entry_t entry;
860
861 get_swap_pages(1, &entry);
862 return entry;
863}
864
865/* The only caller of this function is now suspend routine */ 858/* The only caller of this function is now suspend routine */
866swp_entry_t get_swap_page_of_type(int type) 859swp_entry_t get_swap_page_of_type(int type)
867{ 860{
@@ -1052,7 +1045,7 @@ void swap_free(swp_entry_t entry)
1052 p = _swap_info_get(entry); 1045 p = _swap_info_get(entry);
1053 if (p) { 1046 if (p) {
1054 if (!__swap_entry_free(p, entry, 1)) 1047 if (!__swap_entry_free(p, entry, 1))
1055 swapcache_free_entries(&entry, 1); 1048 free_swap_slot(entry);
1056 } 1049 }
1057} 1050}
1058 1051
@@ -1066,7 +1059,7 @@ void swapcache_free(swp_entry_t entry)
1066 p = _swap_info_get(entry); 1059 p = _swap_info_get(entry);
1067 if (p) { 1060 if (p) {
1068 if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE)) 1061 if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
1069 swapcache_free_entries(&entry, 1); 1062 free_swap_slot(entry);
1070 } 1063 }
1071} 1064}
1072 1065
@@ -1288,7 +1281,7 @@ int free_swap_and_cache(swp_entry_t entry)
1288 page = NULL; 1281 page = NULL;
1289 } 1282 }
1290 } else if (!count) 1283 } else if (!count)
1291 swapcache_free_entries(&entry, 1); 1284 free_swap_slot(entry);
1292 } 1285 }
1293 if (page) { 1286 if (page) {
1294 /* 1287 /*
@@ -2116,6 +2109,17 @@ static void reinsert_swap_info(struct swap_info_struct *p)
2116 spin_unlock(&swap_lock); 2109 spin_unlock(&swap_lock);
2117} 2110}
2118 2111
2112bool has_usable_swap(void)
2113{
2114 bool ret = true;
2115
2116 spin_lock(&swap_lock);
2117 if (plist_head_empty(&swap_active_head))
2118 ret = false;
2119 spin_unlock(&swap_lock);
2120 return ret;
2121}
2122
2119SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) 2123SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2120{ 2124{
2121 struct swap_info_struct *p = NULL; 2125 struct swap_info_struct *p = NULL;