aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2015-02-13 08:27:41 -0500
committerMike Snitzer <snitzer@redhat.com>2015-02-16 11:11:15 -0500
commitb3c5fd3052492f1b8d060799d4f18be5a5438add (patch)
tree09ea4e16adffb5eafc7f93c938c64b9a5f6841a1 /drivers/md
parent0f5d8e6ee758f7023e4353cca75d785b2d4f6abe (diff)
dm crypt: sort writes
Write requests are sorted in a red-black tree structure and are submitted in the sorted order. In theory the sorting should be performed by the underlying disk scheduler, however, in practice the disk scheduler only accepts and sorts a finite number of requests. To allow the sorting of all requests, dm-crypt needs to implement its own sorting. The overhead associated with rbtree-based sorting is considered negligible so it is not used conditionally. Even on SSD sorting can be beneficial since in-order request dispatch promotes lower latency IO completion to the upper layers. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c51
1 files changed, 36 insertions, 15 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4519a7c0098c..713a96237a80 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -22,6 +22,7 @@
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/atomic.h> 23#include <linux/atomic.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/rbtree.h>
25#include <asm/page.h> 26#include <asm/page.h>
26#include <asm/unaligned.h> 27#include <asm/unaligned.h>
27#include <crypto/hash.h> 28#include <crypto/hash.h>
@@ -60,7 +61,7 @@ struct dm_crypt_io {
60 int error; 61 int error;
61 sector_t sector; 62 sector_t sector;
62 63
63 struct list_head list; 64 struct rb_node rb_node;
64} CRYPTO_MINALIGN_ATTR; 65} CRYPTO_MINALIGN_ATTR;
65 66
66struct dm_crypt_request { 67struct dm_crypt_request {
@@ -134,7 +135,7 @@ struct crypt_config {
134 135
135 struct task_struct *write_thread; 136 struct task_struct *write_thread;
136 wait_queue_head_t write_thread_wait; 137 wait_queue_head_t write_thread_wait;
137 struct list_head write_thread_list; 138 struct rb_root write_tree;
138 139
139 char *cipher; 140 char *cipher;
140 char *cipher_string; 141 char *cipher_string;
@@ -1169,11 +1170,15 @@ static void kcryptd_io_write(struct dm_crypt_io *io)
1169 generic_make_request(clone); 1170 generic_make_request(clone);
1170} 1171}
1171 1172
1173#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
1174
1172static int dmcrypt_write(void *data) 1175static int dmcrypt_write(void *data)
1173{ 1176{
1174 struct crypt_config *cc = data; 1177 struct crypt_config *cc = data;
1178 struct dm_crypt_io *io;
1179
1175 while (1) { 1180 while (1) {
1176 struct list_head local_list; 1181 struct rb_root write_tree;
1177 struct blk_plug plug; 1182 struct blk_plug plug;
1178 1183
1179 DECLARE_WAITQUEUE(wait, current); 1184 DECLARE_WAITQUEUE(wait, current);
@@ -1181,7 +1186,7 @@ static int dmcrypt_write(void *data)
1181 spin_lock_irq(&cc->write_thread_wait.lock); 1186 spin_lock_irq(&cc->write_thread_wait.lock);
1182continue_locked: 1187continue_locked:
1183 1188
1184 if (!list_empty(&cc->write_thread_list)) 1189 if (!RB_EMPTY_ROOT(&cc->write_tree))
1185 goto pop_from_list; 1190 goto pop_from_list;
1186 1191
1187 __set_current_state(TASK_INTERRUPTIBLE); 1192 __set_current_state(TASK_INTERRUPTIBLE);
@@ -1203,20 +1208,22 @@ continue_locked:
1203 goto continue_locked; 1208 goto continue_locked;
1204 1209
1205pop_from_list: 1210pop_from_list:
1206 local_list = cc->write_thread_list; 1211 write_tree = cc->write_tree;
1207 local_list.next->prev = &local_list; 1212 cc->write_tree = RB_ROOT;
1208 local_list.prev->next = &local_list;
1209 INIT_LIST_HEAD(&cc->write_thread_list);
1210
1211 spin_unlock_irq(&cc->write_thread_wait.lock); 1213 spin_unlock_irq(&cc->write_thread_wait.lock);
1212 1214
1215 BUG_ON(rb_parent(write_tree.rb_node));
1216
1217 /*
1218 * Note: we cannot walk the tree here with rb_next because
1219 * the structures may be freed when kcryptd_io_write is called.
1220 */
1213 blk_start_plug(&plug); 1221 blk_start_plug(&plug);
1214 do { 1222 do {
1215 struct dm_crypt_io *io = container_of(local_list.next, 1223 io = crypt_io_from_node(rb_first(&write_tree));
1216 struct dm_crypt_io, list); 1224 rb_erase(&io->rb_node, &write_tree);
1217 list_del(&io->list);
1218 kcryptd_io_write(io); 1225 kcryptd_io_write(io);
1219 } while (!list_empty(&local_list)); 1226 } while (!RB_EMPTY_ROOT(&write_tree));
1220 blk_finish_plug(&plug); 1227 blk_finish_plug(&plug);
1221 } 1228 }
1222 return 0; 1229 return 0;
@@ -1227,6 +1234,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1227 struct bio *clone = io->ctx.bio_out; 1234 struct bio *clone = io->ctx.bio_out;
1228 struct crypt_config *cc = io->cc; 1235 struct crypt_config *cc = io->cc;
1229 unsigned long flags; 1236 unsigned long flags;
1237 sector_t sector;
1238 struct rb_node **rbp, *parent;
1230 1239
1231 if (unlikely(io->error < 0)) { 1240 if (unlikely(io->error < 0)) {
1232 crypt_free_buffer_pages(cc, clone); 1241 crypt_free_buffer_pages(cc, clone);
@@ -1246,7 +1255,19 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1246 } 1255 }
1247 1256
1248 spin_lock_irqsave(&cc->write_thread_wait.lock, flags); 1257 spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
1249 list_add_tail(&io->list, &cc->write_thread_list); 1258 rbp = &cc->write_tree.rb_node;
1259 parent = NULL;
1260 sector = io->sector;
1261 while (*rbp) {
1262 parent = *rbp;
1263 if (sector < crypt_io_from_node(parent)->sector)
1264 rbp = &(*rbp)->rb_left;
1265 else
1266 rbp = &(*rbp)->rb_right;
1267 }
1268 rb_link_node(&io->rb_node, parent, rbp);
1269 rb_insert_color(&io->rb_node, &cc->write_tree);
1270
1250 wake_up_locked(&cc->write_thread_wait); 1271 wake_up_locked(&cc->write_thread_wait);
1251 spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); 1272 spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
1252} 1273}
@@ -1836,7 +1857,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1836 } 1857 }
1837 1858
1838 init_waitqueue_head(&cc->write_thread_wait); 1859 init_waitqueue_head(&cc->write_thread_wait);
1839 INIT_LIST_HEAD(&cc->write_thread_list); 1860 cc->write_tree = RB_ROOT;
1840 1861
1841 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); 1862 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
1842 if (IS_ERR(cc->write_thread)) { 1863 if (IS_ERR(cc->write_thread)) {