aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-09-11 17:20:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:57:15 -0400
commit815c2c543d3aeb914a361f981440ece552778724 (patch)
tree7d6f0de8493abbb08f0a42cb565087868b9eaeb4 /include/linux
parent2a8f9449343260373398d59228a62a4332ea513a (diff)
swap: make swap discard async
swap can do cluster discard for SSD, which is good, but there are some problems here: 1. swap do the discard just before page reclaim gets a swap entry and writes the disk sectors. This is useless for high end SSD, because an overwrite to a sector implies a discard to original sector too. A discard + overwrite == overwrite. 2. the purpose of doing discard is to improve SSD firmware garbage collection. Idealy we should send discard as early as possible, so firmware can do something smart. Sending discard just after swap entry is freed is considered early compared to sending discard before write. Of course, if workload is already bound to gc speed, sending discard earlier or later doesn't make 3. block discard is a sync API, which will delay scan_swap_map() significantly. 4. Write and discard command can be executed parallel in PCIe SSD. Making swap discard async can make execution more efficiently. This patch makes swap discard async and moves discard to where swap entry is freed. Discard and write have no dependence now, so above issues can be avoided. Idealy we should do discard for any freed sectors, but some SSD discard is very slow. This patch still does discard for a whole cluster. My test does a several round of 'mmap, write, unmap', which will trigger a lot of swap discard. In a fusionio card, with this patch, the test runtime is reduced to 18% of the time without it, so around 5.5x faster. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Kyungmin Park <kmpark@infradead.org> Cc: Hugh Dickins <hughd@google.com> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/swap.h20
1 files changed, 11 insertions, 9 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cb5baebf31d6..8a3c4a1caa14 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -217,8 +217,6 @@ struct swap_info_struct {
217 unsigned int inuse_pages; /* number of those currently in use */ 217 unsigned int inuse_pages; /* number of those currently in use */
218 unsigned int cluster_next; /* likely index for next allocation */ 218 unsigned int cluster_next; /* likely index for next allocation */
219 unsigned int cluster_nr; /* countdown to next cluster search */ 219 unsigned int cluster_nr; /* countdown to next cluster search */
220 unsigned int lowest_alloc; /* while preparing discard cluster */
221 unsigned int highest_alloc; /* while preparing discard cluster */
222 struct swap_extent *curr_swap_extent; 220 struct swap_extent *curr_swap_extent;
223 struct swap_extent first_swap_extent; 221 struct swap_extent first_swap_extent;
224 struct block_device *bdev; /* swap device or bdev of swap file */ 222 struct block_device *bdev; /* swap device or bdev of swap file */
@@ -232,14 +230,18 @@ struct swap_info_struct {
232 * protect map scan related fields like 230 * protect map scan related fields like
233 * swap_map, lowest_bit, highest_bit, 231 * swap_map, lowest_bit, highest_bit,
234 * inuse_pages, cluster_next, 232 * inuse_pages, cluster_next,
235 * cluster_nr, lowest_alloc and 233 * cluster_nr, lowest_alloc,
236 * highest_alloc. other fields are only 234 * highest_alloc, free/discard cluster
237 * changed at swapon/swapoff, so are 235 * list. other fields are only changed
238 * protected by swap_lock. changing 236 * at swapon/swapoff, so are protected
239 * flags need hold this lock and 237 * by swap_lock. changing flags need
240 * swap_lock. If both locks need hold, 238 * hold this lock and swap_lock. If
241 * hold swap_lock first. 239 * both locks need hold, hold swap_lock
240 * first.
242 */ 241 */
242 struct work_struct discard_work; /* discard worker */
243 struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */
244 struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
243}; 245};
244 246
245struct swap_list_t { 247struct swap_list_t {