aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/swap.h
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-09-11 17:20:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:57:15 -0400
commit2a8f9449343260373398d59228a62a4332ea513a (patch)
tree76c6ddf2a99d9dc7519585ba65c9883005908286 /include/linux/swap.h
parent15ca220e1a63af06e000691e4ae1beaba5430c32 (diff)
swap: change block allocation algorithm for SSD
I'm using a fast SSD to do swap. scan_swap_map() sometimes uses up to 20~30% CPU time (when cluster is hard to find, the CPU time can be up to 80%), which becomes a bottleneck. scan_swap_map() scans a byte array to search a 256 page cluster, which is very slow. Here I introduced a simple algorithm to search cluster. Since we only care about 256 pages cluster, we can just use a counter to track if a cluster is free. Every 256 pages use one int to store the counter. If the counter of a cluster is 0, the cluster is free. All free clusters will be added to a list, so searching cluster is very efficient. With this, scap_swap_map() overhead disappears. This might help low end SD card swap too. Because if the cluster is aligned, SD firmware can do flash erase more efficiently. We only enable the algorithm for SSD. Hard disk swap isn't fast enough and has downside with the algorithm which might introduce regression (see below). The patch slightly changes which cluster is choosen. It always adds free cluster to list tail. This can help wear leveling for low end SSD too. And if no cluster found, the scan_swap_map() will do search from the end of last cluster. So if no cluster found, the scan_swap_map() will do search from the end of last free cluster, which is random. For SSD, this isn't a problem at all. Another downside is the cluster must be aligned to 256 pages, which will reduce the chance to find a cluster. I would expect this isn't a big problem for SSD because of the non-seek penality. (And this is the reason I only enable the algorithm for SSD). Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Kyungmin Park <kmpark@infradead.org> Cc: Hugh Dickins <hughd@google.com> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r--include/linux/swap.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d95cde5e257d..cb5baebf31d6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -182,6 +182,23 @@ enum {
182#define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */ 182#define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */
183 183
184/* 184/*
185 * We use this to track usage of a cluster. A cluster is a block of swap disk
186 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
187 * free clusters are organized into a list. We fetch an entry from the list to
188 * get a free cluster.
189 *
190 * The data field stores next cluster if the cluster is free or cluster usage
191 * counter otherwise. The flags field determines if a cluster is free. This is
192 * protected by swap_info_struct.lock.
193 */
194struct swap_cluster_info {
195 unsigned int data:24;
196 unsigned int flags:8;
197};
198#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
199#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
200
201/*
185 * The in-memory structure used to track swap areas. 202 * The in-memory structure used to track swap areas.
186 */ 203 */
187struct swap_info_struct { 204struct swap_info_struct {
@@ -191,6 +208,9 @@ struct swap_info_struct {
191 signed char next; /* next type on the swap list */ 208 signed char next; /* next type on the swap list */
192 unsigned int max; /* extent of the swap_map */ 209 unsigned int max; /* extent of the swap_map */
193 unsigned char *swap_map; /* vmalloc'ed array of usage counts */ 210 unsigned char *swap_map; /* vmalloc'ed array of usage counts */
211 struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
212 struct swap_cluster_info free_cluster_head; /* free cluster list head */
213 struct swap_cluster_info free_cluster_tail; /* free cluster list tail */
194 unsigned int lowest_bit; /* index of first free in swap_map */ 214 unsigned int lowest_bit; /* index of first free in swap_map */
195 unsigned int highest_bit; /* index of last free in swap_map */ 215 unsigned int highest_bit; /* index of last free in swap_map */
196 unsigned int pages; /* total of usable pages of swap */ 216 unsigned int pages; /* total of usable pages of swap */