diff options
-rw-r--r-- | drivers/block/nbd.c | 337 |
1 files changed, 129 insertions, 208 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a9e398019f38..15e7c6740873 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/kthread.h> | 34 | #include <linux/kthread.h> |
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <linux/debugfs.h> | 36 | #include <linux/debugfs.h> |
37 | #include <linux/blk-mq.h> | ||
37 | 38 | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | #include <asm/types.h> | 40 | #include <asm/types.h> |
@@ -45,12 +46,8 @@ struct nbd_device { | |||
45 | struct socket * sock; /* If == NULL, device is not ready, yet */ | 46 | struct socket * sock; /* If == NULL, device is not ready, yet */ |
46 | int magic; | 47 | int magic; |
47 | 48 | ||
48 | spinlock_t queue_lock; | 49 | atomic_t outstanding_cmds; |
49 | struct list_head queue_head; /* Requests waiting result */ | 50 | struct blk_mq_tag_set tag_set; |
50 | struct request *active_req; | ||
51 | wait_queue_head_t active_wq; | ||
52 | struct list_head waiting_queue; /* Requests to be sent */ | ||
53 | wait_queue_head_t waiting_wq; | ||
54 | 51 | ||
55 | struct mutex tx_lock; | 52 | struct mutex tx_lock; |
56 | struct gendisk *disk; | 53 | struct gendisk *disk; |
@@ -71,6 +68,11 @@ struct nbd_device { | |||
71 | #endif | 68 | #endif |
72 | }; | 69 | }; |
73 | 70 | ||
71 | struct nbd_cmd { | ||
72 | struct nbd_device *nbd; | ||
73 | struct list_head list; | ||
74 | }; | ||
75 | |||
74 | #if IS_ENABLED(CONFIG_DEBUG_FS) | 76 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
75 | static struct dentry *nbd_dbg_dir; | 77 | static struct dentry *nbd_dbg_dir; |
76 | #endif | 78 | #endif |
@@ -83,18 +85,6 @@ static unsigned int nbds_max = 16; | |||
83 | static struct nbd_device *nbd_dev; | 85 | static struct nbd_device *nbd_dev; |
84 | static int max_part; | 86 | static int max_part; |
85 | 87 | ||
86 | /* | ||
87 | * Use just one lock (or at most 1 per NIC). Two arguments for this: | ||
88 | * 1. Each NIC is essentially a synchronization point for all servers | ||
89 | * accessed through that NIC so there's no need to have more locks | ||
90 | * than NICs anyway. | ||
91 | * 2. More locks lead to more "Dirty cache line bouncing" which will slow | ||
92 | * down each lock to the point where they're actually slower than just | ||
93 | * a single lock. | ||
94 | * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this! | ||
95 | */ | ||
96 | static DEFINE_SPINLOCK(nbd_lock); | ||
97 | |||
98 | static inline struct device *nbd_to_dev(struct nbd_device *nbd) | 88 | static inline struct device *nbd_to_dev(struct nbd_device *nbd) |
99 | { | 89 | { |
100 | return disk_to_dev(nbd->disk); | 90 | return disk_to_dev(nbd->disk); |
@@ -153,18 +143,17 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, | |||
153 | return 0; | 143 | return 0; |
154 | } | 144 | } |
155 | 145 | ||
156 | static void nbd_end_request(struct nbd_device *nbd, struct request *req) | 146 | static void nbd_end_request(struct nbd_cmd *cmd) |
157 | { | 147 | { |
148 | struct nbd_device *nbd = cmd->nbd; | ||
149 | struct request *req = blk_mq_rq_from_pdu(cmd); | ||
158 | int error = req->errors ? -EIO : 0; | 150 | int error = req->errors ? -EIO : 0; |
159 | struct request_queue *q = req->q; | ||
160 | unsigned long flags; | ||
161 | 151 | ||
162 | dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req, | 152 | dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, |
163 | error ? "failed" : "done"); | 153 | error ? "failed" : "done"); |
164 | 154 | ||
165 | spin_lock_irqsave(q->queue_lock, flags); | 155 | atomic_dec(&nbd->outstanding_cmds); |
166 | __blk_end_request_all(req, error); | 156 | blk_mq_complete_request(req, error); |
167 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
168 | } | 157 | } |
169 | 158 | ||
170 | /* | 159 | /* |
@@ -193,7 +182,7 @@ static void nbd_xmit_timeout(unsigned long arg) | |||
193 | struct nbd_device *nbd = (struct nbd_device *)arg; | 182 | struct nbd_device *nbd = (struct nbd_device *)arg; |
194 | unsigned long flags; | 183 | unsigned long flags; |
195 | 184 | ||
196 | if (list_empty(&nbd->queue_head)) | 185 | if (!atomic_read(&nbd->outstanding_cmds)) |
197 | return; | 186 | return; |
198 | 187 | ||
199 | spin_lock_irqsave(&nbd->sock_lock, flags); | 188 | spin_lock_irqsave(&nbd->sock_lock, flags); |
@@ -273,8 +262,9 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, | |||
273 | } | 262 | } |
274 | 263 | ||
275 | /* always call with the tx_lock held */ | 264 | /* always call with the tx_lock held */ |
276 | static int nbd_send_req(struct nbd_device *nbd, struct request *req) | 265 | static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) |
277 | { | 266 | { |
267 | struct request *req = blk_mq_rq_from_pdu(cmd); | ||
278 | int result, flags; | 268 | int result, flags; |
279 | struct nbd_request request; | 269 | struct nbd_request request; |
280 | unsigned long size = blk_rq_bytes(req); | 270 | unsigned long size = blk_rq_bytes(req); |
@@ -298,10 +288,10 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) | |||
298 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); | 288 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); |
299 | request.len = htonl(size); | 289 | request.len = htonl(size); |
300 | } | 290 | } |
301 | memcpy(request.handle, &req, sizeof(req)); | 291 | memcpy(request.handle, &req->tag, sizeof(req->tag)); |
302 | 292 | ||
303 | dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", | 293 | dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", |
304 | req, nbdcmd_to_ascii(type), | 294 | cmd, nbdcmd_to_ascii(type), |
305 | (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); | 295 | (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); |
306 | result = sock_xmit(nbd, 1, &request, sizeof(request), | 296 | result = sock_xmit(nbd, 1, &request, sizeof(request), |
307 | (type == NBD_CMD_WRITE) ? MSG_MORE : 0); | 297 | (type == NBD_CMD_WRITE) ? MSG_MORE : 0); |
@@ -323,7 +313,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) | |||
323 | if (!rq_iter_last(bvec, iter)) | 313 | if (!rq_iter_last(bvec, iter)) |
324 | flags = MSG_MORE; | 314 | flags = MSG_MORE; |
325 | dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", | 315 | dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", |
326 | req, bvec.bv_len); | 316 | cmd, bvec.bv_len); |
327 | result = sock_send_bvec(nbd, &bvec, flags); | 317 | result = sock_send_bvec(nbd, &bvec, flags); |
328 | if (result <= 0) { | 318 | if (result <= 0) { |
329 | dev_err(disk_to_dev(nbd->disk), | 319 | dev_err(disk_to_dev(nbd->disk), |
@@ -336,29 +326,6 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) | |||
336 | return 0; | 326 | return 0; |
337 | } | 327 | } |
338 | 328 | ||
339 | static struct request *nbd_find_request(struct nbd_device *nbd, | ||
340 | struct request *xreq) | ||
341 | { | ||
342 | struct request *req, *tmp; | ||
343 | int err; | ||
344 | |||
345 | err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq); | ||
346 | if (unlikely(err)) | ||
347 | return ERR_PTR(err); | ||
348 | |||
349 | spin_lock(&nbd->queue_lock); | ||
350 | list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) { | ||
351 | if (req != xreq) | ||
352 | continue; | ||
353 | list_del_init(&req->queuelist); | ||
354 | spin_unlock(&nbd->queue_lock); | ||
355 | return req; | ||
356 | } | ||
357 | spin_unlock(&nbd->queue_lock); | ||
358 | |||
359 | return ERR_PTR(-ENOENT); | ||
360 | } | ||
361 | |||
362 | static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) | 329 | static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) |
363 | { | 330 | { |
364 | int result; | 331 | int result; |
@@ -370,11 +337,14 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) | |||
370 | } | 337 | } |
371 | 338 | ||
372 | /* NULL returned = something went wrong, inform userspace */ | 339 | /* NULL returned = something went wrong, inform userspace */ |
373 | static struct request *nbd_read_stat(struct nbd_device *nbd) | 340 | static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) |
374 | { | 341 | { |
375 | int result; | 342 | int result; |
376 | struct nbd_reply reply; | 343 | struct nbd_reply reply; |
377 | struct request *req; | 344 | struct nbd_cmd *cmd; |
345 | struct request *req = NULL; | ||
346 | u16 hwq; | ||
347 | int tag; | ||
378 | 348 | ||
379 | reply.magic = 0; | 349 | reply.magic = 0; |
380 | result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); | 350 | result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); |
@@ -390,25 +360,27 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) | |||
390 | return ERR_PTR(-EPROTO); | 360 | return ERR_PTR(-EPROTO); |
391 | } | 361 | } |
392 | 362 | ||
393 | req = nbd_find_request(nbd, *(struct request **)reply.handle); | 363 | memcpy(&tag, reply.handle, sizeof(int)); |
394 | if (IS_ERR(req)) { | ||
395 | result = PTR_ERR(req); | ||
396 | if (result != -ENOENT) | ||
397 | return ERR_PTR(result); | ||
398 | 364 | ||
399 | dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", | 365 | hwq = blk_mq_unique_tag_to_hwq(tag); |
400 | reply.handle); | 366 | if (hwq < nbd->tag_set.nr_hw_queues) |
401 | return ERR_PTR(-EBADR); | 367 | req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], |
368 | blk_mq_unique_tag_to_tag(tag)); | ||
369 | if (!req || !blk_mq_request_started(req)) { | ||
370 | dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", | ||
371 | tag, req); | ||
372 | return ERR_PTR(-ENOENT); | ||
402 | } | 373 | } |
374 | cmd = blk_mq_rq_to_pdu(req); | ||
403 | 375 | ||
404 | if (ntohl(reply.error)) { | 376 | if (ntohl(reply.error)) { |
405 | dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", | 377 | dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", |
406 | ntohl(reply.error)); | 378 | ntohl(reply.error)); |
407 | req->errors++; | 379 | req->errors++; |
408 | return req; | 380 | return cmd; |
409 | } | 381 | } |
410 | 382 | ||
411 | dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); | 383 | dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); |
412 | if (rq_data_dir(req) != WRITE) { | 384 | if (rq_data_dir(req) != WRITE) { |
413 | struct req_iterator iter; | 385 | struct req_iterator iter; |
414 | struct bio_vec bvec; | 386 | struct bio_vec bvec; |
@@ -419,13 +391,13 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) | |||
419 | dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", | 391 | dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", |
420 | result); | 392 | result); |
421 | req->errors++; | 393 | req->errors++; |
422 | return req; | 394 | return cmd; |
423 | } | 395 | } |
424 | dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", | 396 | dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", |
425 | req, bvec.bv_len); | 397 | cmd, bvec.bv_len); |
426 | } | 398 | } |
427 | } | 399 | } |
428 | return req; | 400 | return cmd; |
429 | } | 401 | } |
430 | 402 | ||
431 | static ssize_t pid_show(struct device *dev, | 403 | static ssize_t pid_show(struct device *dev, |
@@ -444,7 +416,7 @@ static struct device_attribute pid_attr = { | |||
444 | 416 | ||
445 | static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) | 417 | static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) |
446 | { | 418 | { |
447 | struct request *req; | 419 | struct nbd_cmd *cmd; |
448 | int ret; | 420 | int ret; |
449 | 421 | ||
450 | BUG_ON(nbd->magic != NBD_MAGIC); | 422 | BUG_ON(nbd->magic != NBD_MAGIC); |
@@ -460,13 +432,13 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) | |||
460 | nbd_size_update(nbd, bdev); | 432 | nbd_size_update(nbd, bdev); |
461 | 433 | ||
462 | while (1) { | 434 | while (1) { |
463 | req = nbd_read_stat(nbd); | 435 | cmd = nbd_read_stat(nbd); |
464 | if (IS_ERR(req)) { | 436 | if (IS_ERR(cmd)) { |
465 | ret = PTR_ERR(req); | 437 | ret = PTR_ERR(cmd); |
466 | break; | 438 | break; |
467 | } | 439 | } |
468 | 440 | ||
469 | nbd_end_request(nbd, req); | 441 | nbd_end_request(cmd); |
470 | } | 442 | } |
471 | 443 | ||
472 | nbd_size_clear(nbd, bdev); | 444 | nbd_size_clear(nbd, bdev); |
@@ -475,44 +447,37 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) | |||
475 | return ret; | 447 | return ret; |
476 | } | 448 | } |
477 | 449 | ||
478 | static void nbd_clear_que(struct nbd_device *nbd) | 450 | static void nbd_clear_req(struct request *req, void *data, bool reserved) |
479 | { | 451 | { |
480 | struct request *req; | 452 | struct nbd_cmd *cmd; |
481 | 453 | ||
454 | if (!blk_mq_request_started(req)) | ||
455 | return; | ||
456 | cmd = blk_mq_rq_to_pdu(req); | ||
457 | req->errors++; | ||
458 | nbd_end_request(cmd); | ||
459 | } | ||
460 | |||
461 | static void nbd_clear_que(struct nbd_device *nbd) | ||
462 | { | ||
482 | BUG_ON(nbd->magic != NBD_MAGIC); | 463 | BUG_ON(nbd->magic != NBD_MAGIC); |
483 | 464 | ||
484 | /* | 465 | /* |
485 | * Because we have set nbd->sock to NULL under the tx_lock, all | 466 | * Because we have set nbd->sock to NULL under the tx_lock, all |
486 | * modifications to the list must have completed by now. For | 467 | * modifications to the list must have completed by now. |
487 | * the same reason, the active_req must be NULL. | ||
488 | * | ||
489 | * As a consequence, we don't need to take the spin lock while | ||
490 | * purging the list here. | ||
491 | */ | 468 | */ |
492 | BUG_ON(nbd->sock); | 469 | BUG_ON(nbd->sock); |
493 | BUG_ON(nbd->active_req); | ||
494 | 470 | ||
495 | while (!list_empty(&nbd->queue_head)) { | 471 | blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); |
496 | req = list_entry(nbd->queue_head.next, struct request, | ||
497 | queuelist); | ||
498 | list_del_init(&req->queuelist); | ||
499 | req->errors++; | ||
500 | nbd_end_request(nbd, req); | ||
501 | } | ||
502 | |||
503 | while (!list_empty(&nbd->waiting_queue)) { | ||
504 | req = list_entry(nbd->waiting_queue.next, struct request, | ||
505 | queuelist); | ||
506 | list_del_init(&req->queuelist); | ||
507 | req->errors++; | ||
508 | nbd_end_request(nbd, req); | ||
509 | } | ||
510 | dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); | 472 | dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); |
511 | } | 473 | } |
512 | 474 | ||
513 | 475 | ||
514 | static void nbd_handle_req(struct nbd_device *nbd, struct request *req) | 476 | static void nbd_handle_cmd(struct nbd_cmd *cmd) |
515 | { | 477 | { |
478 | struct request *req = blk_mq_rq_from_pdu(cmd); | ||
479 | struct nbd_device *nbd = cmd->nbd; | ||
480 | |||
516 | if (req->cmd_type != REQ_TYPE_FS) | 481 | if (req->cmd_type != REQ_TYPE_FS) |
517 | goto error_out; | 482 | goto error_out; |
518 | 483 | ||
@@ -526,6 +491,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) | |||
526 | req->errors = 0; | 491 | req->errors = 0; |
527 | 492 | ||
528 | mutex_lock(&nbd->tx_lock); | 493 | mutex_lock(&nbd->tx_lock); |
494 | nbd->task_send = current; | ||
529 | if (unlikely(!nbd->sock)) { | 495 | if (unlikely(!nbd->sock)) { |
530 | mutex_unlock(&nbd->tx_lock); | 496 | mutex_unlock(&nbd->tx_lock); |
531 | dev_err(disk_to_dev(nbd->disk), | 497 | dev_err(disk_to_dev(nbd->disk), |
@@ -533,106 +499,34 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) | |||
533 | goto error_out; | 499 | goto error_out; |
534 | } | 500 | } |
535 | 501 | ||
536 | nbd->active_req = req; | 502 | if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds)) |
537 | |||
538 | if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head)) | ||
539 | mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); | 503 | mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); |
540 | 504 | ||
541 | if (nbd_send_req(nbd, req) != 0) { | 505 | atomic_inc(&nbd->outstanding_cmds); |
506 | if (nbd_send_cmd(nbd, cmd) != 0) { | ||
542 | dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); | 507 | dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); |
543 | req->errors++; | 508 | req->errors++; |
544 | nbd_end_request(nbd, req); | 509 | nbd_end_request(cmd); |
545 | } else { | ||
546 | spin_lock(&nbd->queue_lock); | ||
547 | list_add_tail(&req->queuelist, &nbd->queue_head); | ||
548 | spin_unlock(&nbd->queue_lock); | ||
549 | } | 510 | } |
550 | 511 | ||
551 | nbd->active_req = NULL; | 512 | nbd->task_send = NULL; |
552 | mutex_unlock(&nbd->tx_lock); | 513 | mutex_unlock(&nbd->tx_lock); |
553 | wake_up_all(&nbd->active_wq); | ||
554 | 514 | ||
555 | return; | 515 | return; |
556 | 516 | ||
557 | error_out: | 517 | error_out: |
558 | req->errors++; | 518 | req->errors++; |
559 | nbd_end_request(nbd, req); | 519 | nbd_end_request(cmd); |
560 | } | ||
561 | |||
562 | static int nbd_thread_send(void *data) | ||
563 | { | ||
564 | struct nbd_device *nbd = data; | ||
565 | struct request *req; | ||
566 | |||
567 | nbd->task_send = current; | ||
568 | |||
569 | set_user_nice(current, MIN_NICE); | ||
570 | while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { | ||
571 | /* wait for something to do */ | ||
572 | wait_event_interruptible(nbd->waiting_wq, | ||
573 | kthread_should_stop() || | ||
574 | !list_empty(&nbd->waiting_queue)); | ||
575 | |||
576 | /* extract request */ | ||
577 | if (list_empty(&nbd->waiting_queue)) | ||
578 | continue; | ||
579 | |||
580 | spin_lock_irq(&nbd->queue_lock); | ||
581 | req = list_entry(nbd->waiting_queue.next, struct request, | ||
582 | queuelist); | ||
583 | list_del_init(&req->queuelist); | ||
584 | spin_unlock_irq(&nbd->queue_lock); | ||
585 | |||
586 | /* handle request */ | ||
587 | nbd_handle_req(nbd, req); | ||
588 | } | ||
589 | |||
590 | nbd->task_send = NULL; | ||
591 | |||
592 | return 0; | ||
593 | } | 520 | } |
594 | 521 | ||
595 | /* | 522 | static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, |
596 | * We always wait for result of write, for now. It would be nice to make it optional | 523 | const struct blk_mq_queue_data *bd) |
597 | * in future | ||
598 | * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK)) | ||
599 | * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } | ||
600 | */ | ||
601 | |||
602 | static void nbd_request_handler(struct request_queue *q) | ||
603 | __releases(q->queue_lock) __acquires(q->queue_lock) | ||
604 | { | 524 | { |
605 | struct request *req; | 525 | struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); |
606 | |||
607 | while ((req = blk_fetch_request(q)) != NULL) { | ||
608 | struct nbd_device *nbd; | ||
609 | |||
610 | spin_unlock_irq(q->queue_lock); | ||
611 | |||
612 | nbd = req->rq_disk->private_data; | ||
613 | |||
614 | BUG_ON(nbd->magic != NBD_MAGIC); | ||
615 | 526 | ||
616 | dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n", | 527 | blk_mq_start_request(bd->rq); |
617 | req, req->cmd_type); | 528 | nbd_handle_cmd(cmd); |
618 | 529 | return BLK_MQ_RQ_QUEUE_OK; | |
619 | if (unlikely(!nbd->sock)) { | ||
620 | dev_err_ratelimited(disk_to_dev(nbd->disk), | ||
621 | "Attempted send on closed socket\n"); | ||
622 | req->errors++; | ||
623 | nbd_end_request(nbd, req); | ||
624 | spin_lock_irq(q->queue_lock); | ||
625 | continue; | ||
626 | } | ||
627 | |||
628 | spin_lock_irq(&nbd->queue_lock); | ||
629 | list_add_tail(&req->queuelist, &nbd->waiting_queue); | ||
630 | spin_unlock_irq(&nbd->queue_lock); | ||
631 | |||
632 | wake_up(&nbd->waiting_wq); | ||
633 | |||
634 | spin_lock_irq(q->queue_lock); | ||
635 | } | ||
636 | } | 530 | } |
637 | 531 | ||
638 | static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) | 532 | static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) |
@@ -700,33 +594,37 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
700 | { | 594 | { |
701 | switch (cmd) { | 595 | switch (cmd) { |
702 | case NBD_DISCONNECT: { | 596 | case NBD_DISCONNECT: { |
703 | struct request sreq; | 597 | struct request *sreq; |
704 | 598 | ||
705 | dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); | 599 | dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); |
706 | if (!nbd->sock) | 600 | if (!nbd->sock) |
707 | return -EINVAL; | 601 | return -EINVAL; |
708 | 602 | ||
603 | sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0); | ||
604 | if (!sreq) | ||
605 | return -ENOMEM; | ||
606 | |||
709 | mutex_unlock(&nbd->tx_lock); | 607 | mutex_unlock(&nbd->tx_lock); |
710 | fsync_bdev(bdev); | 608 | fsync_bdev(bdev); |
711 | mutex_lock(&nbd->tx_lock); | 609 | mutex_lock(&nbd->tx_lock); |
712 | blk_rq_init(NULL, &sreq); | 610 | sreq->cmd_type = REQ_TYPE_DRV_PRIV; |
713 | sreq.cmd_type = REQ_TYPE_DRV_PRIV; | ||
714 | 611 | ||
715 | /* Check again after getting mutex back. */ | 612 | /* Check again after getting mutex back. */ |
716 | if (!nbd->sock) | 613 | if (!nbd->sock) { |
614 | blk_mq_free_request(sreq); | ||
717 | return -EINVAL; | 615 | return -EINVAL; |
616 | } | ||
718 | 617 | ||
719 | nbd->disconnect = true; | 618 | nbd->disconnect = true; |
720 | 619 | ||
721 | nbd_send_req(nbd, &sreq); | 620 | nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq)); |
621 | blk_mq_free_request(sreq); | ||
722 | return 0; | 622 | return 0; |
723 | } | 623 | } |
724 | 624 | ||
725 | case NBD_CLEAR_SOCK: | 625 | case NBD_CLEAR_SOCK: |
726 | sock_shutdown(nbd); | 626 | sock_shutdown(nbd); |
727 | nbd_clear_que(nbd); | 627 | nbd_clear_que(nbd); |
728 | BUG_ON(!list_empty(&nbd->queue_head)); | ||
729 | BUG_ON(!list_empty(&nbd->waiting_queue)); | ||
730 | kill_bdev(bdev); | 628 | kill_bdev(bdev); |
731 | return 0; | 629 | return 0; |
732 | 630 | ||
@@ -772,7 +670,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
772 | return 0; | 670 | return 0; |
773 | 671 | ||
774 | case NBD_DO_IT: { | 672 | case NBD_DO_IT: { |
775 | struct task_struct *thread; | ||
776 | int error; | 673 | int error; |
777 | 674 | ||
778 | if (nbd->task_recv) | 675 | if (nbd->task_recv) |
@@ -786,18 +683,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
786 | 683 | ||
787 | nbd_parse_flags(nbd, bdev); | 684 | nbd_parse_flags(nbd, bdev); |
788 | 685 | ||
789 | thread = kthread_run(nbd_thread_send, nbd, "%s", | ||
790 | nbd_name(nbd)); | ||
791 | if (IS_ERR(thread)) { | ||
792 | mutex_lock(&nbd->tx_lock); | ||
793 | nbd->task_recv = NULL; | ||
794 | return PTR_ERR(thread); | ||
795 | } | ||
796 | |||
797 | nbd_dev_dbg_init(nbd); | 686 | nbd_dev_dbg_init(nbd); |
798 | error = nbd_thread_recv(nbd, bdev); | 687 | error = nbd_thread_recv(nbd, bdev); |
799 | nbd_dev_dbg_close(nbd); | 688 | nbd_dev_dbg_close(nbd); |
800 | kthread_stop(thread); | ||
801 | 689 | ||
802 | mutex_lock(&nbd->tx_lock); | 690 | mutex_lock(&nbd->tx_lock); |
803 | nbd->task_recv = NULL; | 691 | nbd->task_recv = NULL; |
@@ -825,10 +713,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
825 | return 0; | 713 | return 0; |
826 | 714 | ||
827 | case NBD_PRINT_DEBUG: | 715 | case NBD_PRINT_DEBUG: |
828 | dev_info(disk_to_dev(nbd->disk), | 716 | /* |
829 | "next = %p, prev = %p, head = %p\n", | 717 | * For compatibility only, we no longer keep a list of |
830 | nbd->queue_head.next, nbd->queue_head.prev, | 718 | * outstanding requests. |
831 | &nbd->queue_head); | 719 | */ |
832 | return 0; | 720 | return 0; |
833 | } | 721 | } |
834 | return -ENOTTY; | 722 | return -ENOTTY; |
@@ -987,6 +875,23 @@ static void nbd_dbg_close(void) | |||
987 | 875 | ||
988 | #endif | 876 | #endif |
989 | 877 | ||
878 | static int nbd_init_request(void *data, struct request *rq, | ||
879 | unsigned int hctx_idx, unsigned int request_idx, | ||
880 | unsigned int numa_node) | ||
881 | { | ||
882 | struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); | ||
883 | |||
884 | cmd->nbd = data; | ||
885 | INIT_LIST_HEAD(&cmd->list); | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | static struct blk_mq_ops nbd_mq_ops = { | ||
890 | .queue_rq = nbd_queue_rq, | ||
891 | .map_queue = blk_mq_map_queue, | ||
892 | .init_request = nbd_init_request, | ||
893 | }; | ||
894 | |||
990 | /* | 895 | /* |
991 | * And here should be modules and kernel interface | 896 | * And here should be modules and kernel interface |
992 | * (Just smiley confuses emacs :-) | 897 | * (Just smiley confuses emacs :-) |
@@ -1035,16 +940,34 @@ static int __init nbd_init(void) | |||
1035 | if (!disk) | 940 | if (!disk) |
1036 | goto out; | 941 | goto out; |
1037 | nbd_dev[i].disk = disk; | 942 | nbd_dev[i].disk = disk; |
943 | |||
944 | nbd_dev[i].tag_set.ops = &nbd_mq_ops; | ||
945 | nbd_dev[i].tag_set.nr_hw_queues = 1; | ||
946 | nbd_dev[i].tag_set.queue_depth = 128; | ||
947 | nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; | ||
948 | nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); | ||
949 | nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | | ||
950 | BLK_MQ_F_SG_MERGE; | ||
951 | nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; | ||
952 | |||
953 | err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); | ||
954 | if (err) { | ||
955 | put_disk(disk); | ||
956 | goto out; | ||
957 | } | ||
958 | |||
1038 | /* | 959 | /* |
1039 | * The new linux 2.5 block layer implementation requires | 960 | * The new linux 2.5 block layer implementation requires |
1040 | * every gendisk to have its very own request_queue struct. | 961 | * every gendisk to have its very own request_queue struct. |
1041 | * These structs are big so we dynamically allocate them. | 962 | * These structs are big so we dynamically allocate them. |
1042 | */ | 963 | */ |
1043 | disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock); | 964 | disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set); |
1044 | if (!disk->queue) { | 965 | if (!disk->queue) { |
966 | blk_mq_free_tag_set(&nbd_dev[i].tag_set); | ||
1045 | put_disk(disk); | 967 | put_disk(disk); |
1046 | goto out; | 968 | goto out; |
1047 | } | 969 | } |
970 | |||
1048 | /* | 971 | /* |
1049 | * Tell the block layer that we are not a rotational device | 972 | * Tell the block layer that we are not a rotational device |
1050 | */ | 973 | */ |
@@ -1069,16 +992,12 @@ static int __init nbd_init(void) | |||
1069 | for (i = 0; i < nbds_max; i++) { | 992 | for (i = 0; i < nbds_max; i++) { |
1070 | struct gendisk *disk = nbd_dev[i].disk; | 993 | struct gendisk *disk = nbd_dev[i].disk; |
1071 | nbd_dev[i].magic = NBD_MAGIC; | 994 | nbd_dev[i].magic = NBD_MAGIC; |
1072 | INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); | ||
1073 | spin_lock_init(&nbd_dev[i].queue_lock); | ||
1074 | spin_lock_init(&nbd_dev[i].sock_lock); | 995 | spin_lock_init(&nbd_dev[i].sock_lock); |
1075 | INIT_LIST_HEAD(&nbd_dev[i].queue_head); | ||
1076 | mutex_init(&nbd_dev[i].tx_lock); | 996 | mutex_init(&nbd_dev[i].tx_lock); |
1077 | init_timer(&nbd_dev[i].timeout_timer); | 997 | init_timer(&nbd_dev[i].timeout_timer); |
1078 | nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; | 998 | nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; |
1079 | nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; | 999 | nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; |
1080 | init_waitqueue_head(&nbd_dev[i].active_wq); | 1000 | atomic_set(&nbd_dev[i].outstanding_cmds, 0); |
1081 | init_waitqueue_head(&nbd_dev[i].waiting_wq); | ||
1082 | disk->major = NBD_MAJOR; | 1001 | disk->major = NBD_MAJOR; |
1083 | disk->first_minor = i << part_shift; | 1002 | disk->first_minor = i << part_shift; |
1084 | disk->fops = &nbd_fops; | 1003 | disk->fops = &nbd_fops; |
@@ -1091,6 +1010,7 @@ static int __init nbd_init(void) | |||
1091 | return 0; | 1010 | return 0; |
1092 | out: | 1011 | out: |
1093 | while (i--) { | 1012 | while (i--) { |
1013 | blk_mq_free_tag_set(&nbd_dev[i].tag_set); | ||
1094 | blk_cleanup_queue(nbd_dev[i].disk->queue); | 1014 | blk_cleanup_queue(nbd_dev[i].disk->queue); |
1095 | put_disk(nbd_dev[i].disk); | 1015 | put_disk(nbd_dev[i].disk); |
1096 | } | 1016 | } |
@@ -1110,6 +1030,7 @@ static void __exit nbd_cleanup(void) | |||
1110 | if (disk) { | 1030 | if (disk) { |
1111 | del_gendisk(disk); | 1031 | del_gendisk(disk); |
1112 | blk_cleanup_queue(disk->queue); | 1032 | blk_cleanup_queue(disk->queue); |
1033 | blk_mq_free_tag_set(&nbd_dev[i].tag_set); | ||
1113 | put_disk(disk); | 1034 | put_disk(disk); |
1114 | } | 1035 | } |
1115 | } | 1036 | } |