diff options
author | Alex Bligh <alex@alex.org.uk> | 2013-02-27 20:05:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-27 22:10:22 -0500 |
commit | 75f187aba5e7a3eea259041f85099029774a4c5b (patch) | |
tree | 34a26995689413e123463300447f2e0fb7b05673 /drivers/block/nbd.c | |
parent | cd89f46b52cd2354d3d322ea7eab193b86ba03c6 (diff) |
nbd: support FLUSH requests
Currently, the NBD device does not accept flush requests from the Linux
block layer. If the NBD server opened the target with neither O_SYNC nor
O_DSYNC, however, the device will be effectively backed by a writeback
cache. Without issuing flushes properly, operation of the NBD device will
not be safe against power losses.
The NBD protocol has support for both a cache flush command and a FUA
command flag; the server will also pass a flag to note its support for
these features. This patch adds support for the cache flush command and
flag. In the kernel, we receive the flags via the NBD_SET_FLAGS ioctl,
and map NBD_FLAG_SEND_FLUSH to the argument of blk_queue_flush. When the
flag is active the block layer will send REQ_FLUSH requests, which we
translate to NBD_CMD_FLUSH commands.
FUA support is not included in this patch because all free software
servers implement it with a full fdatasync; thus it has no advantage over
supporting flush only. Because I [Paolo] cannot really benchmark it in a
realistic scenario, I cannot tell if it is a good idea or not. It is also
not clear if it is valid for an NBD server to support FUA but not flush.
The Linux block layer gives a warning for this combination, the NBD
protocol documentation says nothing about it.
The patch also fixes a small problem in the handling of flags: nbd->flags
must be cleared at the end of NBD_DO_IT, but the driver was not doing
that. The bug manifests itself as follows. Suppose you two different
client/server pairs to start the NBD device. Suppose also that the first
client supports NBD_SET_FLAGS, and the first server sends
NBD_FLAG_SEND_FLUSH; the second pair instead does neither of these two
things. Before this patch, the second invocation of NBD_DO_IT will use a
stale value of nbd->flags, and the second server will issue an error every
time it receives an NBD_CMD_FLUSH command.
This bug is pre-existing, but it becomes much more important after this
patch; flush failures make the device pretty much unusable, unlike
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alex Bligh <alex@alex.org.uk>
Acked-by: Paul Clements <Paul.Clements@steeleye.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r-- | drivers/block/nbd.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146bf65e5..695c68fedd32 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) | |||
98 | case NBD_CMD_READ: return "read"; | 98 | case NBD_CMD_READ: return "read"; |
99 | case NBD_CMD_WRITE: return "write"; | 99 | case NBD_CMD_WRITE: return "write"; |
100 | case NBD_CMD_DISC: return "disconnect"; | 100 | case NBD_CMD_DISC: return "disconnect"; |
101 | case NBD_CMD_FLUSH: return "flush"; | ||
101 | case NBD_CMD_TRIM: return "trim/discard"; | 102 | case NBD_CMD_TRIM: return "trim/discard"; |
102 | } | 103 | } |
103 | return "invalid"; | 104 | return "invalid"; |
@@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) | |||
244 | 245 | ||
245 | request.magic = htonl(NBD_REQUEST_MAGIC); | 246 | request.magic = htonl(NBD_REQUEST_MAGIC); |
246 | request.type = htonl(nbd_cmd(req)); | 247 | request.type = htonl(nbd_cmd(req)); |
247 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); | 248 | |
248 | request.len = htonl(size); | 249 | if (nbd_cmd(req) == NBD_CMD_FLUSH) { |
250 | /* Other values are reserved for FLUSH requests. */ | ||
251 | request.from = 0; | ||
252 | request.len = 0; | ||
253 | } else { | ||
254 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); | ||
255 | request.len = htonl(size); | ||
256 | } | ||
249 | memcpy(request.handle, &req, sizeof(req)); | 257 | memcpy(request.handle, &req, sizeof(req)); |
250 | 258 | ||
251 | dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", | 259 | dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", |
@@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) | |||
482 | } | 490 | } |
483 | } | 491 | } |
484 | 492 | ||
493 | if (req->cmd_flags & REQ_FLUSH) { | ||
494 | BUG_ON(unlikely(blk_rq_sectors(req))); | ||
495 | nbd_cmd(req) = NBD_CMD_FLUSH; | ||
496 | } | ||
497 | |||
485 | req->errors = 0; | 498 | req->errors = 0; |
486 | 499 | ||
487 | mutex_lock(&nbd->tx_lock); | 500 | mutex_lock(&nbd->tx_lock); |
@@ -684,6 +697,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
684 | if (nbd->flags & NBD_FLAG_SEND_TRIM) | 697 | if (nbd->flags & NBD_FLAG_SEND_TRIM) |
685 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | 698 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, |
686 | nbd->disk->queue); | 699 | nbd->disk->queue); |
700 | if (nbd->flags & NBD_FLAG_SEND_FLUSH) | ||
701 | blk_queue_flush(nbd->disk->queue, REQ_FLUSH); | ||
702 | else | ||
703 | blk_queue_flush(nbd->disk->queue, 0); | ||
687 | 704 | ||
688 | thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); | 705 | thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); |
689 | if (IS_ERR(thread)) { | 706 | if (IS_ERR(thread)) { |
@@ -705,6 +722,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
705 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); | 722 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); |
706 | if (file) | 723 | if (file) |
707 | fput(file); | 724 | fput(file); |
725 | nbd->flags = 0; | ||
708 | nbd->bytesize = 0; | 726 | nbd->bytesize = 0; |
709 | bdev->bd_inode->i_size = 0; | 727 | bdev->bd_inode->i_size = 0; |
710 | set_capacity(nbd->disk, 0); | 728 | set_capacity(nbd->disk, 0); |