aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:58:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:15 -0500
commit570a335b8e22579e2a51a68136d2b1f907a20eec (patch)
treec5312383e948d2e7ac60c2fa410fee98e8b38a70 /include/linux
parent8d69aaee80c123b460918816cbfa2e83224c3646 (diff)
swap_info: swap count continuations
Swap is duplicated (reference count incremented by one) whenever the same swap page is inserted into another mm (when forking finds a swap entry in place of a pte, or when reclaim unmaps a pte to insert the swap entry). swap_info_struct's vmalloc'ed swap_map is the array of these reference counts: but what happens when the unsigned short (or unsigned char since the preceding patch) is full? (and its high bit is kept for a cache flag) We then lose track of it, never freeing, leaving it in use until swapoff: at which point we _hope_ that a single pass will have found all instances, assume there are no more, and will lose user data if we're wrong. Swapping of KSM pages has not yet been enabled; but it is implemented, and makes it very easy for a user to overflow the maximum swap count: possible with ordinary process pages, but unlikely, even when pid_max has been raised from PID_MAX_DEFAULT. This patch implements swap count continuations: when the count overflows, a continuation page is allocated and linked to the original vmalloc'ed map page, and this used to hold the continuation counts for that entry and its neighbours. These continuation pages are seldom referenced: the common paths all work on the original swap_map, only referring to a continuation page when the low "digit" of a count is incremented or decremented through SWAP_MAP_MAX. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/swap.h22
1 files changed, 16 insertions, 6 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f733deb10748..389e7bd92cca 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -145,15 +145,18 @@ enum {
145 SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ 145 SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */
146 SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ 146 SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
147 SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ 147 SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
148 SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
148 /* add others here before... */ 149 /* add others here before... */
149 SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ 150 SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
150}; 151};
151 152
152#define SWAP_CLUSTER_MAX 32 153#define SWAP_CLUSTER_MAX 32
153 154
154#define SWAP_MAP_MAX 0x7e 155#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */
155#define SWAP_MAP_BAD 0x7f 156#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */
156#define SWAP_HAS_CACHE 0x80 /* There is a swap cache of entry. */ 157#define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */
158#define SWAP_CONT_MAX 0x7f /* Max count, in each swap_map continuation */
159#define COUNT_CONTINUED 0x80 /* See swap_map continuation for full count */
157 160
158/* 161/*
159 * The in-memory structure used to track swap areas. 162 * The in-memory structure used to track swap areas.
@@ -311,9 +314,10 @@ extern long total_swap_pages;
311extern void si_swapinfo(struct sysinfo *); 314extern void si_swapinfo(struct sysinfo *);
312extern swp_entry_t get_swap_page(void); 315extern swp_entry_t get_swap_page(void);
313extern swp_entry_t get_swap_page_of_type(int); 316extern swp_entry_t get_swap_page_of_type(int);
314extern void swap_duplicate(swp_entry_t);
315extern int swapcache_prepare(swp_entry_t);
316extern int valid_swaphandles(swp_entry_t, unsigned long *); 317extern int valid_swaphandles(swp_entry_t, unsigned long *);
318extern int add_swap_count_continuation(swp_entry_t, gfp_t);
319extern int swap_duplicate(swp_entry_t);
320extern int swapcache_prepare(swp_entry_t);
317extern void swap_free(swp_entry_t); 321extern void swap_free(swp_entry_t);
318extern void swapcache_free(swp_entry_t, struct page *page); 322extern void swapcache_free(swp_entry_t, struct page *page);
319extern int free_swap_and_cache(swp_entry_t); 323extern int free_swap_and_cache(swp_entry_t);
@@ -385,8 +389,14 @@ static inline void show_swap_cache_info(void)
385#define free_swap_and_cache(swp) is_migration_entry(swp) 389#define free_swap_and_cache(swp) is_migration_entry(swp)
386#define swapcache_prepare(swp) is_migration_entry(swp) 390#define swapcache_prepare(swp) is_migration_entry(swp)
387 391
388static inline void swap_duplicate(swp_entry_t swp) 392static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
389{ 393{
394 return 0;
395}
396
397static inline int swap_duplicate(swp_entry_t swp)
398{
399 return 0;
390} 400}
391 401
392static inline void swap_free(swp_entry_t swp) 402static inline void swap_free(swp_entry_t swp)