aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-06-15 15:32:04 -0400
committerSage Weil <sage@inktank.com>2012-06-15 15:32:04 -0400
commit9a64e8e0ace51b309fdcff4b4754b3649250382a (patch)
tree1f0d75c196c5ab0408c55ed6cf3a152f1f921e15 /drivers/block
parentf3dea7edd3d449fe7a6d402c1ce56a294b985261 (diff)
parentf8f5701bdaf9134b1f90e5044a82c66324d2073f (diff)
Merge tag 'v3.5-rc1'
Linux 3.5-rc1 Conflicts: net/ceph/messenger.c
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/DAC960.c23
-rw-r--r--drivers/block/Kconfig2
-rw-r--r--drivers/block/brd.c20
-rw-r--r--drivers/block/cciss_scsi.c3
-rw-r--r--drivers/block/drbd/drbd_actlog.c104
-rw-r--r--drivers/block/drbd/drbd_bitmap.c196
-rw-r--r--drivers/block/drbd/drbd_int.h90
-rw-r--r--drivers/block/drbd/drbd_main.c357
-rw-r--r--drivers/block/drbd/drbd_nl.c56
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c101
-rw-r--r--drivers/block/drbd/drbd_req.c132
-rw-r--r--drivers/block/drbd/drbd_req.h19
-rw-r--r--drivers/block/drbd/drbd_worker.c31
-rw-r--r--drivers/block/floppy.c198
-rw-r--r--drivers/block/hd.c1
-rw-r--r--drivers/block/loop.c16
-rw-r--r--drivers/block/mtip32xx/Kconfig2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c1076
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h62
-rw-r--r--drivers/block/nbd.c296
-rw-r--r--drivers/block/nvme.c1
-rw-r--r--drivers/block/pktcdvd.c8
-rw-r--r--drivers/block/sunvdc.c5
-rw-r--r--drivers/block/ub.c39
-rw-r--r--drivers/block/viodasd.c809
-rw-r--r--drivers/block/virtio_blk.c63
-rw-r--r--drivers/block/xd.c1
-rw-r--r--drivers/block/xen-blkback/blkback.c50
-rw-r--r--drivers/block/xen-blkback/common.h6
-rw-r--r--drivers/block/xen-blkback/xenbus.c89
-rw-r--r--drivers/block/xen-blkfront.c88
32 files changed, 2026 insertions, 1920 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 8db9089127c5..9a13e889837e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6580,24 +6580,21 @@ static const struct file_operations dac960_user_command_proc_fops = {
6580 6580
6581static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) 6581static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
6582{ 6582{
6583 struct proc_dir_entry *StatusProcEntry;
6584 struct proc_dir_entry *ControllerProcEntry; 6583 struct proc_dir_entry *ControllerProcEntry;
6585 struct proc_dir_entry *UserCommandProcEntry;
6586 6584
6587 if (DAC960_ProcDirectoryEntry == NULL) { 6585 if (DAC960_ProcDirectoryEntry == NULL) {
6588 DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); 6586 DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
6589 StatusProcEntry = proc_create("status", 0, 6587 proc_create("status", 0, DAC960_ProcDirectoryEntry,
6590 DAC960_ProcDirectoryEntry, 6588 &dac960_proc_fops);
6591 &dac960_proc_fops);
6592 } 6589 }
6593 6590
6594 sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber); 6591 sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
6595 ControllerProcEntry = proc_mkdir(Controller->ControllerName, 6592 ControllerProcEntry = proc_mkdir(Controller->ControllerName,
6596 DAC960_ProcDirectoryEntry); 6593 DAC960_ProcDirectoryEntry);
6597 proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller); 6594 proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
6598 proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller); 6595 proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
6599 UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); 6596 proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
6600 Controller->ControllerProcEntry = ControllerProcEntry; 6597 Controller->ControllerProcEntry = ControllerProcEntry;
6601} 6598}
6602 6599
6603 6600
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 4e4c8a4a5fd3..a796407123c7 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -354,7 +354,7 @@ config BLK_DEV_SX8
354 Use devices /dev/sx8/$N and /dev/sx8/$Np$M. 354 Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
355 355
356config BLK_DEV_UB 356config BLK_DEV_UB
357 tristate "Low Performance USB Block driver" 357 tristate "Low Performance USB Block driver (deprecated)"
358 depends on USB 358 depends on USB
359 help 359 help
360 This driver supports certain USB attached storage devices 360 This driver supports certain USB attached storage devices
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index ec246437f5a4..531ceb31d0ff 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
242 page = brd_lookup_page(brd, sector); 242 page = brd_lookup_page(brd, sector);
243 BUG_ON(!page); 243 BUG_ON(!page);
244 244
245 dst = kmap_atomic(page, KM_USER1); 245 dst = kmap_atomic(page);
246 memcpy(dst + offset, src, copy); 246 memcpy(dst + offset, src, copy);
247 kunmap_atomic(dst, KM_USER1); 247 kunmap_atomic(dst);
248 248
249 if (copy < n) { 249 if (copy < n) {
250 src += copy; 250 src += copy;
@@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
253 page = brd_lookup_page(brd, sector); 253 page = brd_lookup_page(brd, sector);
254 BUG_ON(!page); 254 BUG_ON(!page);
255 255
256 dst = kmap_atomic(page, KM_USER1); 256 dst = kmap_atomic(page);
257 memcpy(dst, src, copy); 257 memcpy(dst, src, copy);
258 kunmap_atomic(dst, KM_USER1); 258 kunmap_atomic(dst);
259 } 259 }
260} 260}
261 261
@@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
273 copy = min_t(size_t, n, PAGE_SIZE - offset); 273 copy = min_t(size_t, n, PAGE_SIZE - offset);
274 page = brd_lookup_page(brd, sector); 274 page = brd_lookup_page(brd, sector);
275 if (page) { 275 if (page) {
276 src = kmap_atomic(page, KM_USER1); 276 src = kmap_atomic(page);
277 memcpy(dst, src + offset, copy); 277 memcpy(dst, src + offset, copy);
278 kunmap_atomic(src, KM_USER1); 278 kunmap_atomic(src);
279 } else 279 } else
280 memset(dst, 0, copy); 280 memset(dst, 0, copy);
281 281
@@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
285 copy = n - copy; 285 copy = n - copy;
286 page = brd_lookup_page(brd, sector); 286 page = brd_lookup_page(brd, sector);
287 if (page) { 287 if (page) {
288 src = kmap_atomic(page, KM_USER1); 288 src = kmap_atomic(page);
289 memcpy(dst, src, copy); 289 memcpy(dst, src, copy);
290 kunmap_atomic(src, KM_USER1); 290 kunmap_atomic(src);
291 } else 291 } else
292 memset(dst, 0, copy); 292 memset(dst, 0, copy);
293 } 293 }
@@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
309 goto out; 309 goto out;
310 } 310 }
311 311
312 mem = kmap_atomic(page, KM_USER0); 312 mem = kmap_atomic(page);
313 if (rw == READ) { 313 if (rw == READ) {
314 copy_from_brd(mem + off, brd, sector, len); 314 copy_from_brd(mem + off, brd, sector, len);
315 flush_dcache_page(page); 315 flush_dcache_page(page);
@@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
317 flush_dcache_page(page); 317 flush_dcache_page(page);
318 copy_to_brd(brd, mem + off, sector, len); 318 copy_to_brd(brd, mem + off, sector, len);
319 } 319 }
320 kunmap_atomic(mem, KM_USER0); 320 kunmap_atomic(mem);
321 321
322out: 322out:
323 return err; 323 return err;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e820b68d2f6c..acda773b3720 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -866,6 +866,7 @@ cciss_scsi_detect(ctlr_info_t *h)
866 sh->can_queue = cciss_tape_cmds; 866 sh->can_queue = cciss_tape_cmds;
867 sh->sg_tablesize = h->maxsgentries; 867 sh->sg_tablesize = h->maxsgentries;
868 sh->max_cmd_len = MAX_COMMAND_SIZE; 868 sh->max_cmd_len = MAX_COMMAND_SIZE;
869 sh->max_sectors = h->cciss_max_sectors;
869 870
870 ((struct cciss_scsi_adapter_data_t *) 871 ((struct cciss_scsi_adapter_data_t *)
871 h->scsi_ctlr)->scsi_host = sh; 872 h->scsi_ctlr)->scsi_host = sh;
@@ -1410,7 +1411,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
1410 /* track how many SG entries we are using */ 1411 /* track how many SG entries we are using */
1411 if (request_nsgs > h->maxSG) 1412 if (request_nsgs > h->maxSG)
1412 h->maxSG = request_nsgs; 1413 h->maxSG = request_nsgs;
1413 c->Header.SGTotal = (__u8) request_nsgs + chained; 1414 c->Header.SGTotal = (u16) request_nsgs + chained;
1414 if (request_nsgs > h->max_cmd_sgentries) 1415 if (request_nsgs > h->max_cmd_sgentries)
1415 c->Header.SGList = h->max_cmd_sgentries; 1416 c->Header.SGList = h->max_cmd_sgentries;
1416 else 1417 else
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index cf0e63dd97da..e54e31b02b88 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -65,39 +65,80 @@ struct drbd_atodb_wait {
65 65
66int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); 66int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
67 67
68void *drbd_md_get_buffer(struct drbd_conf *mdev)
69{
70 int r;
71
72 wait_event(mdev->misc_wait,
73 (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
74 mdev->state.disk <= D_FAILED);
75
76 return r ? NULL : page_address(mdev->md_io_page);
77}
78
79void drbd_md_put_buffer(struct drbd_conf *mdev)
80{
81 if (atomic_dec_and_test(&mdev->md_io_in_use))
82 wake_up(&mdev->misc_wait);
83}
84
85static bool md_io_allowed(struct drbd_conf *mdev)
86{
87 enum drbd_disk_state ds = mdev->state.disk;
88 return ds >= D_NEGOTIATING || ds == D_ATTACHING;
89}
90
91void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
92 unsigned int *done)
93{
94 long dt = bdev->dc.disk_timeout * HZ / 10;
95 if (dt == 0)
96 dt = MAX_SCHEDULE_TIMEOUT;
97
98 dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
99 if (dt == 0)
100 dev_err(DEV, "meta-data IO operation timed out\n");
101}
102
68static int _drbd_md_sync_page_io(struct drbd_conf *mdev, 103static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
69 struct drbd_backing_dev *bdev, 104 struct drbd_backing_dev *bdev,
70 struct page *page, sector_t sector, 105 struct page *page, sector_t sector,
71 int rw, int size) 106 int rw, int size)
72{ 107{
73 struct bio *bio; 108 struct bio *bio;
74 struct drbd_md_io md_io;
75 int ok; 109 int ok;
76 110
77 md_io.mdev = mdev; 111 mdev->md_io.done = 0;
78 init_completion(&md_io.event); 112 mdev->md_io.error = -ENODEV;
79 md_io.error = 0;
80 113
81 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) 114 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
82 rw |= REQ_FUA | REQ_FLUSH; 115 rw |= REQ_FUA | REQ_FLUSH;
83 rw |= REQ_SYNC; 116 rw |= REQ_SYNC;
84 117
85 bio = bio_alloc(GFP_NOIO, 1); 118 bio = bio_alloc_drbd(GFP_NOIO);
86 bio->bi_bdev = bdev->md_bdev; 119 bio->bi_bdev = bdev->md_bdev;
87 bio->bi_sector = sector; 120 bio->bi_sector = sector;
88 ok = (bio_add_page(bio, page, size, 0) == size); 121 ok = (bio_add_page(bio, page, size, 0) == size);
89 if (!ok) 122 if (!ok)
90 goto out; 123 goto out;
91 bio->bi_private = &md_io; 124 bio->bi_private = &mdev->md_io;
92 bio->bi_end_io = drbd_md_io_complete; 125 bio->bi_end_io = drbd_md_io_complete;
93 bio->bi_rw = rw; 126 bio->bi_rw = rw;
94 127
128 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
129 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
130 ok = 0;
131 goto out;
132 }
133
134 bio_get(bio); /* one bio_put() is in the completion handler */
135 atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
95 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 136 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
96 bio_endio(bio, -EIO); 137 bio_endio(bio, -EIO);
97 else 138 else
98 submit_bio(rw, bio); 139 submit_bio(rw, bio);
99 wait_for_completion(&md_io.event); 140 wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
100 ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; 141 ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;
101 142
102 out: 143 out:
103 bio_put(bio); 144 bio_put(bio);
@@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
111 int offset = 0; 152 int offset = 0;
112 struct page *iop = mdev->md_io_page; 153 struct page *iop = mdev->md_io_page;
113 154
114 D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); 155 D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
115 156
116 BUG_ON(!bdev->md_bdev); 157 BUG_ON(!bdev->md_bdev);
117 158
@@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
328 return 1; 369 return 1;
329 } 370 }
330 371
331 mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ 372 buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
332 buffer = (struct al_transaction *)page_address(mdev->md_io_page); 373 if (!buffer) {
374 dev_err(DEV, "disk failed while waiting for md_io buffer\n");
375 complete(&((struct update_al_work *)w)->event);
376 put_ldev(mdev);
377 return 1;
378 }
333 379
334 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); 380 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
335 buffer->tr_number = cpu_to_be32(mdev->al_tr_number); 381 buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
@@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
374 D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); 420 D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
375 mdev->al_tr_number++; 421 mdev->al_tr_number++;
376 422
377 mutex_unlock(&mdev->md_io_mutex); 423 drbd_md_put_buffer(mdev);
378 424
379 complete(&((struct update_al_work *)w)->event); 425 complete(&((struct update_al_work *)w)->event);
380 put_ldev(mdev); 426 put_ldev(mdev);
@@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
443 /* lock out all other meta data io for now, 489 /* lock out all other meta data io for now,
444 * and make sure the page is mapped. 490 * and make sure the page is mapped.
445 */ 491 */
446 mutex_lock(&mdev->md_io_mutex); 492 buffer = drbd_md_get_buffer(mdev);
447 buffer = page_address(mdev->md_io_page); 493 if (!buffer)
494 return 0;
448 495
449 /* Find the valid transaction in the log */ 496 /* Find the valid transaction in the log */
450 for (i = 0; i <= mx; i++) { 497 for (i = 0; i <= mx; i++) {
@@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
452 if (rv == 0) 499 if (rv == 0)
453 continue; 500 continue;
454 if (rv == -1) { 501 if (rv == -1) {
455 mutex_unlock(&mdev->md_io_mutex); 502 drbd_md_put_buffer(mdev);
456 return 0; 503 return 0;
457 } 504 }
458 cnr = be32_to_cpu(buffer->tr_number); 505 cnr = be32_to_cpu(buffer->tr_number);
@@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
478 525
479 if (!found_valid) { 526 if (!found_valid) {
480 dev_warn(DEV, "No usable activity log found.\n"); 527 dev_warn(DEV, "No usable activity log found.\n");
481 mutex_unlock(&mdev->md_io_mutex); 528 drbd_md_put_buffer(mdev);
482 return 1; 529 return 1;
483 } 530 }
484 531
@@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
493 rv = drbd_al_read_tr(mdev, bdev, buffer, i); 540 rv = drbd_al_read_tr(mdev, bdev, buffer, i);
494 ERR_IF(rv == 0) goto cancel; 541 ERR_IF(rv == 0) goto cancel;
495 if (rv == -1) { 542 if (rv == -1) {
496 mutex_unlock(&mdev->md_io_mutex); 543 drbd_md_put_buffer(mdev);
497 return 0; 544 return 0;
498 } 545 }
499 546
@@ -534,7 +581,7 @@ cancel:
534 mdev->al_tr_pos = 0; 581 mdev->al_tr_pos = 0;
535 582
536 /* ok, we are done with it */ 583 /* ok, we are done with it */
537 mutex_unlock(&mdev->md_io_mutex); 584 drbd_md_put_buffer(mdev);
538 585
539 dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", 586 dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
540 transactions, active_extents); 587 transactions, active_extents);
@@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
671 else 718 else
672 ext->rs_failed += count; 719 ext->rs_failed += count;
673 if (ext->rs_left < ext->rs_failed) { 720 if (ext->rs_left < ext->rs_failed) {
674 dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " 721 dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
675 "rs_failed=%d count=%d\n", 722 "rs_failed=%d count=%d cstate=%s\n",
676 (unsigned long long)sector, 723 (unsigned long long)sector,
677 ext->lce.lc_number, ext->rs_left, 724 ext->lce.lc_number, ext->rs_left,
678 ext->rs_failed, count); 725 ext->rs_failed, count,
679 dump_stack(); 726 drbd_conn_str(mdev->state.conn));
680 727
681 lc_put(mdev->resync, &ext->lce); 728 /* We don't expect to be able to clear more bits
682 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 729 * than have been set when we originally counted
683 return; 730 * the set bits to cache that value in ext->rs_left.
731 * Whatever the reason (disconnect during resync,
732 * delayed local completion of an application write),
733 * try to fix it up by recounting here. */
734 ext->rs_left = drbd_bm_e_weight(mdev, enr);
684 } 735 }
685 } else { 736 } else {
686 /* Normally this element should be in the cache, 737 /* Normally this element should be in the cache,
@@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
1192 put_ldev(mdev); 1243 put_ldev(mdev);
1193 } 1244 }
1194 spin_unlock_irq(&mdev->al_lock); 1245 spin_unlock_irq(&mdev->al_lock);
1246 wake_up(&mdev->al_wait);
1195 1247
1196 return 0; 1248 return 0;
1197} 1249}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 912f585a760f..b5c5ff53cb57 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
205static void bm_store_page_idx(struct page *page, unsigned long idx) 205static void bm_store_page_idx(struct page *page, unsigned long idx)
206{ 206{
207 BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); 207 BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
208 page_private(page) |= idx; 208 set_page_private(page, idx);
209} 209}
210 210
211static unsigned long bm_page_to_idx(struct page *page) 211static unsigned long bm_page_to_idx(struct page *page)
@@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
289 return page_nr; 289 return page_nr;
290} 290}
291 291
292static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) 292static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
293{ 293{
294 struct page *page = b->bm_pages[idx]; 294 struct page *page = b->bm_pages[idx];
295 return (unsigned long *) kmap_atomic(page, km); 295 return (unsigned long *) kmap_atomic(page);
296} 296}
297 297
298static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 298static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
299{ 299{
300 return __bm_map_pidx(b, idx, KM_IRQ1); 300 return __bm_map_pidx(b, idx);
301} 301}
302 302
303static void __bm_unmap(unsigned long *p_addr, const enum km_type km) 303static void __bm_unmap(unsigned long *p_addr)
304{ 304{
305 kunmap_atomic(p_addr, km); 305 kunmap_atomic(p_addr);
306}; 306};
307 307
308static void bm_unmap(unsigned long *p_addr) 308static void bm_unmap(unsigned long *p_addr)
309{ 309{
310 return __bm_unmap(p_addr, KM_IRQ1); 310 return __bm_unmap(p_addr);
311} 311}
312 312
313/* long word offset of _bitmap_ sector */ 313/* long word offset of _bitmap_ sector */
@@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
543 543
544 /* all but last page */ 544 /* all but last page */
545 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { 545 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
546 p_addr = __bm_map_pidx(b, idx, KM_USER0); 546 p_addr = __bm_map_pidx(b, idx);
547 for (i = 0; i < LWPP; i++) 547 for (i = 0; i < LWPP; i++)
548 bits += hweight_long(p_addr[i]); 548 bits += hweight_long(p_addr[i]);
549 __bm_unmap(p_addr, KM_USER0); 549 __bm_unmap(p_addr);
550 cond_resched(); 550 cond_resched();
551 } 551 }
552 /* last (or only) page */ 552 /* last (or only) page */
553 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; 553 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
554 p_addr = __bm_map_pidx(b, idx, KM_USER0); 554 p_addr = __bm_map_pidx(b, idx);
555 for (i = 0; i < last_word; i++) 555 for (i = 0; i < last_word; i++)
556 bits += hweight_long(p_addr[i]); 556 bits += hweight_long(p_addr[i]);
557 p_addr[last_word] &= cpu_to_lel(mask); 557 p_addr[last_word] &= cpu_to_lel(mask);
@@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
559 /* 32bit arch, may have an unused padding long */ 559 /* 32bit arch, may have an unused padding long */
560 if (BITS_PER_LONG == 32 && (last_word & 1) == 0) 560 if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
561 p_addr[last_word+1] = 0; 561 p_addr[last_word+1] = 0;
562 __bm_unmap(p_addr, KM_USER0); 562 __bm_unmap(p_addr);
563 return bits; 563 return bits;
564} 564}
565 565
@@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
886struct bm_aio_ctx { 886struct bm_aio_ctx {
887 struct drbd_conf *mdev; 887 struct drbd_conf *mdev;
888 atomic_t in_flight; 888 atomic_t in_flight;
889 struct completion done; 889 unsigned int done;
890 unsigned flags; 890 unsigned flags;
891#define BM_AIO_COPY_PAGES 1 891#define BM_AIO_COPY_PAGES 1
892 int error; 892 int error;
893 struct kref kref;
893}; 894};
894 895
896static void bm_aio_ctx_destroy(struct kref *kref)
897{
898 struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
899
900 put_ldev(ctx->mdev);
901 kfree(ctx);
902}
903
895/* bv_page may be a copy, or may be the original */ 904/* bv_page may be a copy, or may be the original */
896static void bm_async_io_complete(struct bio *bio, int error) 905static void bm_async_io_complete(struct bio *bio, int error)
897{ 906{
@@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error)
930 939
931 bm_page_unlock_io(mdev, idx); 940 bm_page_unlock_io(mdev, idx);
932 941
933 /* FIXME give back to page pool */
934 if (ctx->flags & BM_AIO_COPY_PAGES) 942 if (ctx->flags & BM_AIO_COPY_PAGES)
935 put_page(bio->bi_io_vec[0].bv_page); 943 mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
936 944
937 bio_put(bio); 945 bio_put(bio);
938 946
939 if (atomic_dec_and_test(&ctx->in_flight)) 947 if (atomic_dec_and_test(&ctx->in_flight)) {
940 complete(&ctx->done); 948 ctx->done = 1;
949 wake_up(&mdev->misc_wait);
950 kref_put(&ctx->kref, &bm_aio_ctx_destroy);
951 }
941} 952}
942 953
943static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) 954static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
944{ 955{
945 /* we are process context. we always get a bio */ 956 struct bio *bio = bio_alloc_drbd(GFP_NOIO);
946 struct bio *bio = bio_alloc(GFP_KERNEL, 1);
947 struct drbd_conf *mdev = ctx->mdev; 957 struct drbd_conf *mdev = ctx->mdev;
948 struct drbd_bitmap *b = mdev->bitmap; 958 struct drbd_bitmap *b = mdev->bitmap;
949 struct page *page; 959 struct page *page;
@@ -966,21 +976,21 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
966 bm_set_page_unchanged(b->bm_pages[page_nr]); 976 bm_set_page_unchanged(b->bm_pages[page_nr]);
967 977
968 if (ctx->flags & BM_AIO_COPY_PAGES) { 978 if (ctx->flags & BM_AIO_COPY_PAGES) {
969 /* FIXME alloc_page is good enough for now, but actually needs
970 * to use pre-allocated page pool */
971 void *src, *dest; 979 void *src, *dest;
972 page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); 980 page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
973 dest = kmap_atomic(page, KM_USER0); 981 dest = kmap_atomic(page);
974 src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); 982 src = kmap_atomic(b->bm_pages[page_nr]);
975 memcpy(dest, src, PAGE_SIZE); 983 memcpy(dest, src, PAGE_SIZE);
976 kunmap_atomic(src, KM_USER1); 984 kunmap_atomic(src);
977 kunmap_atomic(dest, KM_USER0); 985 kunmap_atomic(dest);
978 bm_store_page_idx(page, page_nr); 986 bm_store_page_idx(page, page_nr);
979 } else 987 } else
980 page = b->bm_pages[page_nr]; 988 page = b->bm_pages[page_nr];
981 989
982 bio->bi_bdev = mdev->ldev->md_bdev; 990 bio->bi_bdev = mdev->ldev->md_bdev;
983 bio->bi_sector = on_disk_sector; 991 bio->bi_sector = on_disk_sector;
992 /* bio_add_page of a single page to an empty bio will always succeed,
993 * according to api. Do we want to assert that? */
984 bio_add_page(bio, page, len, 0); 994 bio_add_page(bio, page, len, 0);
985 bio->bi_private = ctx; 995 bio->bi_private = ctx;
986 bio->bi_end_io = bm_async_io_complete; 996 bio->bi_end_io = bm_async_io_complete;
@@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
999/* 1009/*
1000 * bm_rw: read/write the whole bitmap from/to its on disk location. 1010 * bm_rw: read/write the whole bitmap from/to its on disk location.
1001 */ 1011 */
1002static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) 1012static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
1003{ 1013{
1004 struct bm_aio_ctx ctx = { 1014 struct bm_aio_ctx *ctx;
1005 .mdev = mdev,
1006 .in_flight = ATOMIC_INIT(1),
1007 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
1008 .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
1009 };
1010 struct drbd_bitmap *b = mdev->bitmap; 1015 struct drbd_bitmap *b = mdev->bitmap;
1011 int num_pages, i, count = 0; 1016 int num_pages, i, count = 0;
1012 unsigned long now; 1017 unsigned long now;
@@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
1021 * For lazy writeout, we don't care for ongoing changes to the bitmap, 1026 * For lazy writeout, we don't care for ongoing changes to the bitmap,
1022 * as we submit copies of pages anyways. 1027 * as we submit copies of pages anyways.
1023 */ 1028 */
1024 if (!ctx.flags) 1029
1030 ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
1031 if (!ctx)
1032 return -ENOMEM;
1033
1034 *ctx = (struct bm_aio_ctx) {
1035 .mdev = mdev,
1036 .in_flight = ATOMIC_INIT(1),
1037 .done = 0,
1038 .flags = flags,
1039 .error = 0,
1040 .kref = { ATOMIC_INIT(2) },
1041 };
1042
1043 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
1044 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
1045 kfree(ctx);
1046 return -ENODEV;
1047 }
1048
1049 if (!ctx->flags)
1025 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); 1050 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
1026 1051
1027 num_pages = b->bm_number_of_pages; 1052 num_pages = b->bm_number_of_pages;
@@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
1046 continue; 1071 continue;
1047 } 1072 }
1048 } 1073 }
1049 atomic_inc(&ctx.in_flight); 1074 atomic_inc(&ctx->in_flight);
1050 bm_page_io_async(&ctx, i, rw); 1075 bm_page_io_async(ctx, i, rw);
1051 ++count; 1076 ++count;
1052 cond_resched(); 1077 cond_resched();
1053 } 1078 }
1054 1079
1055 /* 1080 /*
1056 * We initialize ctx.in_flight to one to make sure bm_async_io_complete 1081 * We initialize ctx->in_flight to one to make sure bm_async_io_complete
1057 * will not complete() early, and decrement / test it here. If there 1082 * will not set ctx->done early, and decrement / test it here. If there
1058 * are still some bios in flight, we need to wait for them here. 1083 * are still some bios in flight, we need to wait for them here.
1084 * If all IO is done already (or nothing had been submitted), there is
1085 * no need to wait. Still, we need to put the kref associated with the
1086 * "in_flight reached zero, all done" event.
1059 */ 1087 */
1060 if (!atomic_dec_and_test(&ctx.in_flight)) 1088 if (!atomic_dec_and_test(&ctx->in_flight))
1061 wait_for_completion(&ctx.done); 1089 wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
1090 else
1091 kref_put(&ctx->kref, &bm_aio_ctx_destroy);
1092
1062 dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", 1093 dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
1063 rw == WRITE ? "WRITE" : "READ", 1094 rw == WRITE ? "WRITE" : "READ",
1064 count, jiffies - now); 1095 count, jiffies - now);
1065 1096
1066 if (ctx.error) { 1097 if (ctx->error) {
1067 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); 1098 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
1068 drbd_chk_io_error(mdev, 1, true); 1099 drbd_chk_io_error(mdev, 1, true);
1069 err = -EIO; /* ctx.error ? */ 1100 err = -EIO; /* ctx->error ? */
1070 } 1101 }
1071 1102
1103 if (atomic_read(&ctx->in_flight))
1104 err = -EIO; /* Disk failed during IO... */
1105
1072 now = jiffies; 1106 now = jiffies;
1073 if (rw == WRITE) { 1107 if (rw == WRITE) {
1074 drbd_md_flush(mdev); 1108 drbd_md_flush(mdev);
@@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
1082 dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 1116 dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
1083 ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 1117 ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
1084 1118
1119 kref_put(&ctx->kref, &bm_aio_ctx_destroy);
1085 return err; 1120 return err;
1086} 1121}
1087 1122
@@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
1091 */ 1126 */
1092int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) 1127int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
1093{ 1128{
1094 return bm_rw(mdev, READ, 0); 1129 return bm_rw(mdev, READ, 0, 0);
1095} 1130}
1096 1131
1097/** 1132/**
@@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
1102 */ 1137 */
1103int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) 1138int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
1104{ 1139{
1105 return bm_rw(mdev, WRITE, 0); 1140 return bm_rw(mdev, WRITE, 0, 0);
1106} 1141}
1107 1142
1108/** 1143/**
@@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
1112 */ 1147 */
1113int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) 1148int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
1114{ 1149{
1115 return bm_rw(mdev, WRITE, upper_idx); 1150 return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
1151}
1152
1153/**
1154 * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
1155 * @mdev: DRBD device.
1156 *
1157 * Will only write pages that have changed since last IO.
1158 * In contrast to drbd_bm_write(), this will copy the bitmap pages
1159 * to temporary writeout pages. It is intended to trigger a full write-out
1160 * while still allowing the bitmap to change, for example if a resync or online
1161 * verify is aborted due to a failed peer disk, while local IO continues, or
1162 * pending resync acks are still being processed.
1163 */
1164int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
1165{
1166 return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
1116} 1167}
1117 1168
1118 1169
@@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
1130 */ 1181 */
1131int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) 1182int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
1132{ 1183{
1133 struct bm_aio_ctx ctx = { 1184 struct bm_aio_ctx *ctx;
1185 int err;
1186
1187 if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
1188 dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
1189 return 0;
1190 }
1191
1192 ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
1193 if (!ctx)
1194 return -ENOMEM;
1195
1196 *ctx = (struct bm_aio_ctx) {
1134 .mdev = mdev, 1197 .mdev = mdev,
1135 .in_flight = ATOMIC_INIT(1), 1198 .in_flight = ATOMIC_INIT(1),
1136 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), 1199 .done = 0,
1137 .flags = BM_AIO_COPY_PAGES, 1200 .flags = BM_AIO_COPY_PAGES,
1201 .error = 0,
1202 .kref = { ATOMIC_INIT(2) },
1138 }; 1203 };
1139 1204
1140 if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { 1205 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
1141 dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); 1206 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
1142 return 0; 1207 kfree(ctx);
1208 return -ENODEV;
1143 } 1209 }
1144 1210
1145 bm_page_io_async(&ctx, idx, WRITE_SYNC); 1211 bm_page_io_async(ctx, idx, WRITE_SYNC);
1146 wait_for_completion(&ctx.done); 1212 wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
1147 1213
1148 if (ctx.error) 1214 if (ctx->error)
1149 drbd_chk_io_error(mdev, 1, true); 1215 drbd_chk_io_error(mdev, 1, true);
1150 /* that should force detach, so the in memory bitmap will be 1216 /* that should force detach, so the in memory bitmap will be
1151 * gone in a moment as well. */ 1217 * gone in a moment as well. */
1152 1218
1153 mdev->bm_writ_cnt++; 1219 mdev->bm_writ_cnt++;
1154 return ctx.error; 1220 err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
1221 kref_put(&ctx->kref, &bm_aio_ctx_destroy);
1222 return err;
1155} 1223}
1156 1224
1157/* NOTE 1225/* NOTE
@@ -1163,7 +1231,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
1163 * this returns a bit number, NOT a sector! 1231 * this returns a bit number, NOT a sector!
1164 */ 1232 */
1165static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 1233static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1166 const int find_zero_bit, const enum km_type km) 1234 const int find_zero_bit)
1167{ 1235{
1168 struct drbd_bitmap *b = mdev->bitmap; 1236 struct drbd_bitmap *b = mdev->bitmap;
1169 unsigned long *p_addr; 1237 unsigned long *p_addr;
@@ -1178,7 +1246,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1178 while (bm_fo < b->bm_bits) { 1246 while (bm_fo < b->bm_bits) {
1179 /* bit offset of the first bit in the page */ 1247 /* bit offset of the first bit in the page */
1180 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; 1248 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
1181 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); 1249 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
1182 1250
1183 if (find_zero_bit) 1251 if (find_zero_bit)
1184 i = find_next_zero_bit_le(p_addr, 1252 i = find_next_zero_bit_le(p_addr,
@@ -1187,7 +1255,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1187 i = find_next_bit_le(p_addr, 1255 i = find_next_bit_le(p_addr,
1188 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1256 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1189 1257
1190 __bm_unmap(p_addr, km); 1258 __bm_unmap(p_addr);
1191 if (i < PAGE_SIZE*8) { 1259 if (i < PAGE_SIZE*8) {
1192 bm_fo = bit_offset + i; 1260 bm_fo = bit_offset + i;
1193 if (bm_fo >= b->bm_bits) 1261 if (bm_fo >= b->bm_bits)
@@ -1215,7 +1283,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
1215 if (BM_DONT_TEST & b->bm_flags) 1283 if (BM_DONT_TEST & b->bm_flags)
1216 bm_print_lock_info(mdev); 1284 bm_print_lock_info(mdev);
1217 1285
1218 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 1286 i = __bm_find_next(mdev, bm_fo, find_zero_bit);
1219 1287
1220 spin_unlock_irq(&b->bm_lock); 1288 spin_unlock_irq(&b->bm_lock);
1221 return i; 1289 return i;
@@ -1239,13 +1307,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo
1239unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1307unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1240{ 1308{
1241 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ 1309 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1242 return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1310 return __bm_find_next(mdev, bm_fo, 0);
1243} 1311}
1244 1312
1245unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1313unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1246{ 1314{
1247 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ 1315 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1248 return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1316 return __bm_find_next(mdev, bm_fo, 1);
1249} 1317}
1250 1318
1251/* returns number of bits actually changed. 1319/* returns number of bits actually changed.
@@ -1273,14 +1341,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1273 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); 1341 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
1274 if (page_nr != last_page_nr) { 1342 if (page_nr != last_page_nr) {
1275 if (p_addr) 1343 if (p_addr)
1276 __bm_unmap(p_addr, KM_IRQ1); 1344 __bm_unmap(p_addr);
1277 if (c < 0) 1345 if (c < 0)
1278 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1346 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1279 else if (c > 0) 1347 else if (c > 0)
1280 bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 1348 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1281 changed_total += c; 1349 changed_total += c;
1282 c = 0; 1350 c = 0;
1283 p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); 1351 p_addr = __bm_map_pidx(b, page_nr);
1284 last_page_nr = page_nr; 1352 last_page_nr = page_nr;
1285 } 1353 }
1286 if (val) 1354 if (val)
@@ -1289,7 +1357,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1289 c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1357 c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
1290 } 1358 }
1291 if (p_addr) 1359 if (p_addr)
1292 __bm_unmap(p_addr, KM_IRQ1); 1360 __bm_unmap(p_addr);
1293 if (c < 0) 1361 if (c < 0)
1294 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1362 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1295 else if (c > 0) 1363 else if (c > 0)
@@ -1342,13 +1410,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
1342{ 1410{
1343 int i; 1411 int i;
1344 int bits; 1412 int bits;
1345 unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); 1413 unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
1346 for (i = first_word; i < last_word; i++) { 1414 for (i = first_word; i < last_word; i++) {
1347 bits = hweight_long(paddr[i]); 1415 bits = hweight_long(paddr[i]);
1348 paddr[i] = ~0UL; 1416 paddr[i] = ~0UL;
1349 b->bm_set += BITS_PER_LONG - bits; 1417 b->bm_set += BITS_PER_LONG - bits;
1350 } 1418 }
1351 kunmap_atomic(paddr, KM_IRQ1); 1419 kunmap_atomic(paddr);
1352} 1420}
1353 1421
1354/* Same thing as drbd_bm_set_bits, 1422/* Same thing as drbd_bm_set_bits,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8d680562ba73..02f013a073a7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -712,7 +712,6 @@ struct drbd_request {
712 struct list_head tl_requests; /* ring list in the transfer log */ 712 struct list_head tl_requests; /* ring list in the transfer log */
713 struct bio *master_bio; /* master bio pointer */ 713 struct bio *master_bio; /* master bio pointer */
714 unsigned long rq_state; /* see comments above _req_mod() */ 714 unsigned long rq_state; /* see comments above _req_mod() */
715 int seq_num;
716 unsigned long start_time; 715 unsigned long start_time;
717}; 716};
718 717
@@ -851,6 +850,7 @@ enum {
851 NEW_CUR_UUID, /* Create new current UUID when thawing IO */ 850 NEW_CUR_UUID, /* Create new current UUID when thawing IO */
852 AL_SUSPENDED, /* Activity logging is currently suspended. */ 851 AL_SUSPENDED, /* Activity logging is currently suspended. */
853 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ 852 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
853 STATE_SENT, /* Do not change state/UUIDs while this is set */
854}; 854};
855 855
856struct drbd_bitmap; /* opaque for drbd_conf */ 856struct drbd_bitmap; /* opaque for drbd_conf */
@@ -862,31 +862,30 @@ enum bm_flag {
862 BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ 862 BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
863 863
864 /* currently locked for bulk operation */ 864 /* currently locked for bulk operation */
865 BM_LOCKED_MASK = 0x7, 865 BM_LOCKED_MASK = 0xf,
866 866
867 /* in detail, that is: */ 867 /* in detail, that is: */
868 BM_DONT_CLEAR = 0x1, 868 BM_DONT_CLEAR = 0x1,
869 BM_DONT_SET = 0x2, 869 BM_DONT_SET = 0x2,
870 BM_DONT_TEST = 0x4, 870 BM_DONT_TEST = 0x4,
871 871
872 /* so we can mark it locked for bulk operation,
873 * and still allow all non-bulk operations */
874 BM_IS_LOCKED = 0x8,
875
872 /* (test bit, count bit) allowed (common case) */ 876 /* (test bit, count bit) allowed (common case) */
873 BM_LOCKED_TEST_ALLOWED = 0x3, 877 BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,
874 878
875 /* testing bits, as well as setting new bits allowed, but clearing bits 879 /* testing bits, as well as setting new bits allowed, but clearing bits
876 * would be unexpected. Used during bitmap receive. Setting new bits 880 * would be unexpected. Used during bitmap receive. Setting new bits
877 * requires sending of "out-of-sync" information, though. */ 881 * requires sending of "out-of-sync" information, though. */
878 BM_LOCKED_SET_ALLOWED = 0x1, 882 BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED,
879 883
880 /* clear is not expected while bitmap is locked for bulk operation */ 884 /* for drbd_bm_write_copy_pages, everything is allowed,
885 * only concurrent bulk operations are locked out. */
886 BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,
881}; 887};
882 888
883
884/* TODO sort members for performance
885 * MAYBE group them further */
886
887/* THINK maybe we actually want to use the default "event/%s" worker threads
888 * or similar in linux 2.6, which uses per cpu data and threads.
889 */
890struct drbd_work_queue { 889struct drbd_work_queue {
891 struct list_head q; 890 struct list_head q;
892 struct semaphore s; /* producers up it, worker down()s it */ 891 struct semaphore s; /* producers up it, worker down()s it */
@@ -938,8 +937,7 @@ struct drbd_backing_dev {
938}; 937};
939 938
940struct drbd_md_io { 939struct drbd_md_io {
941 struct drbd_conf *mdev; 940 unsigned int done;
942 struct completion event;
943 int error; 941 int error;
944}; 942};
945 943
@@ -1022,6 +1020,7 @@ struct drbd_conf {
1022 struct drbd_tl_epoch *newest_tle; 1020 struct drbd_tl_epoch *newest_tle;
1023 struct drbd_tl_epoch *oldest_tle; 1021 struct drbd_tl_epoch *oldest_tle;
1024 struct list_head out_of_sequence_requests; 1022 struct list_head out_of_sequence_requests;
1023 struct list_head barrier_acked_requests;
1025 struct hlist_head *tl_hash; 1024 struct hlist_head *tl_hash;
1026 unsigned int tl_hash_s; 1025 unsigned int tl_hash_s;
1027 1026
@@ -1056,6 +1055,8 @@ struct drbd_conf {
1056 struct crypto_hash *csums_tfm; 1055 struct crypto_hash *csums_tfm;
1057 struct crypto_hash *verify_tfm; 1056 struct crypto_hash *verify_tfm;
1058 1057
1058 unsigned long last_reattach_jif;
1059 unsigned long last_reconnect_jif;
1059 struct drbd_thread receiver; 1060 struct drbd_thread receiver;
1060 struct drbd_thread worker; 1061 struct drbd_thread worker;
1061 struct drbd_thread asender; 1062 struct drbd_thread asender;
@@ -1094,7 +1095,8 @@ struct drbd_conf {
1094 wait_queue_head_t ee_wait; 1095 wait_queue_head_t ee_wait;
1095 struct page *md_io_page; /* one page buffer for md_io */ 1096 struct page *md_io_page; /* one page buffer for md_io */
1096 struct page *md_io_tmpp; /* for logical_block_size != 512 */ 1097 struct page *md_io_tmpp; /* for logical_block_size != 512 */
1097 struct mutex md_io_mutex; /* protects the md_io_buffer */ 1098 struct drbd_md_io md_io;
1099 atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
1098 spinlock_t al_lock; 1100 spinlock_t al_lock;
1099 wait_queue_head_t al_wait; 1101 wait_queue_head_t al_wait;
1100 struct lru_cache *act_log; /* activity log */ 1102 struct lru_cache *act_log; /* activity log */
@@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev);
1228extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); 1230extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
1229extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); 1231extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
1230extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); 1232extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
1231extern int _drbd_send_state(struct drbd_conf *mdev); 1233extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s);
1232extern int drbd_send_state(struct drbd_conf *mdev); 1234extern int drbd_send_current_state(struct drbd_conf *mdev);
1233extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, 1235extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1234 enum drbd_packets cmd, struct p_header80 *h, 1236 enum drbd_packets cmd, struct p_header80 *h,
1235 size_t size, unsigned msg_flags); 1237 size_t size, unsigned msg_flags);
@@ -1461,6 +1463,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
1461extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); 1463extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
1462extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); 1464extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
1463extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); 1465extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
1466extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
1464extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, 1467extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1465 unsigned long al_enr); 1468 unsigned long al_enr);
1466extern size_t drbd_bm_words(struct drbd_conf *mdev); 1469extern size_t drbd_bm_words(struct drbd_conf *mdev);
@@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
1493extern mempool_t *drbd_request_mempool; 1496extern mempool_t *drbd_request_mempool;
1494extern mempool_t *drbd_ee_mempool; 1497extern mempool_t *drbd_ee_mempool;
1495 1498
1496extern struct page *drbd_pp_pool; /* drbd's page pool */ 1499/* drbd's page pool, used to buffer data received from the peer,
1500 * or data requested by the peer.
1501 *
1502 * This does not have an emergency reserve.
1503 *
1504 * When allocating from this pool, it first takes pages from the pool.
1505 * Only if the pool is depleted will try to allocate from the system.
1506 *
1507 * The assumption is that pages taken from this pool will be processed,
1508 * and given back, "quickly", and then can be recycled, so we can avoid
1509 * frequent calls to alloc_page(), and still will be able to make progress even
1510 * under memory pressure.
1511 */
1512extern struct page *drbd_pp_pool;
1497extern spinlock_t drbd_pp_lock; 1513extern spinlock_t drbd_pp_lock;
1498extern int drbd_pp_vacant; 1514extern int drbd_pp_vacant;
1499extern wait_queue_head_t drbd_pp_wait; 1515extern wait_queue_head_t drbd_pp_wait;
1500 1516
1517/* We also need a standard (emergency-reserve backed) page pool
1518 * for meta data IO (activity log, bitmap).
1519 * We can keep it global, as long as it is used as "N pages at a time".
1520 * 128 should be plenty, currently we probably can get away with as few as 1.
1521 */
1522#define DRBD_MIN_POOL_PAGES 128
1523extern mempool_t *drbd_md_io_page_pool;
1524
1525/* We also need to make sure we get a bio
1526 * when we need it for housekeeping purposes */
1527extern struct bio_set *drbd_md_io_bio_set;
1528/* to allocate from that set */
1529extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
1530
1501extern rwlock_t global_state_lock; 1531extern rwlock_t global_state_lock;
1502 1532
1503extern struct drbd_conf *drbd_new_device(unsigned int minor); 1533extern struct drbd_conf *drbd_new_device(unsigned int minor);
@@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev);
1536extern void suspend_other_sg(struct drbd_conf *mdev); 1566extern void suspend_other_sg(struct drbd_conf *mdev);
1537extern int drbd_resync_finished(struct drbd_conf *mdev); 1567extern int drbd_resync_finished(struct drbd_conf *mdev);
1538/* maybe rather drbd_main.c ? */ 1568/* maybe rather drbd_main.c ? */
1569extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
1570extern void drbd_md_put_buffer(struct drbd_conf *mdev);
1539extern int drbd_md_sync_page_io(struct drbd_conf *mdev, 1571extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
1540 struct drbd_backing_dev *bdev, sector_t sector, int rw); 1572 struct drbd_backing_dev *bdev, sector_t sector, int rw);
1573extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
1574 unsigned int *done);
1541extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); 1575extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
1542extern void drbd_rs_controller_reset(struct drbd_conf *mdev); 1576extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
1543 1577
@@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page)
1754#define page_chain_for_each_safe(page, n) \ 1788#define page_chain_for_each_safe(page, n) \
1755 for (; page && ({ n = page_chain_next(page); 1; }); page = n) 1789 for (; page && ({ n = page_chain_next(page); 1; }); page = n)
1756 1790
1757static inline int drbd_bio_has_active_page(struct bio *bio)
1758{
1759 struct bio_vec *bvec;
1760 int i;
1761
1762 __bio_for_each_segment(bvec, bio, i, 0) {
1763 if (page_count(bvec->bv_page) > 1)
1764 return 1;
1765 }
1766
1767 return 0;
1768}
1769
1770static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) 1791static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
1771{ 1792{
1772 struct page *page = e->pages; 1793 struct page *page = e->pages;
@@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
1777 return 0; 1798 return 0;
1778} 1799}
1779 1800
1780
1781static inline void drbd_state_lock(struct drbd_conf *mdev) 1801static inline void drbd_state_lock(struct drbd_conf *mdev)
1782{ 1802{
1783 wait_event(mdev->misc_wait, 1803 wait_event(mdev->misc_wait,
@@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2230 * Note: currently we don't support such large bitmaps on 32bit 2250 * Note: currently we don't support such large bitmaps on 32bit
2231 * arch anyways, but no harm done to be prepared for it here. 2251 * arch anyways, but no harm done to be prepared for it here.
2232 */ 2252 */
2233 unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; 2253 unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10;
2234 unsigned long left = *bits_left >> shift; 2254 unsigned long left = *bits_left >> shift;
2235 unsigned long total = 1UL + (mdev->rs_total >> shift); 2255 unsigned long total = 1UL + (mdev->rs_total >> shift);
2236 unsigned long tmp = 1000UL - left * 1000UL/total; 2256 unsigned long tmp = 1000UL - left * 1000UL/total;
@@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2306 case D_OUTDATED: 2326 case D_OUTDATED:
2307 case D_CONSISTENT: 2327 case D_CONSISTENT:
2308 case D_UP_TO_DATE: 2328 case D_UP_TO_DATE:
2329 case D_FAILED:
2309 /* disk state is stable as well. */ 2330 /* disk state is stable as well. */
2310 break; 2331 break;
2311 2332
2312 /* no new io accepted during tansitional states */ 2333 /* no new io accepted during tansitional states */
2313 case D_ATTACHING: 2334 case D_ATTACHING:
2314 case D_FAILED:
2315 case D_NEGOTIATING: 2335 case D_NEGOTIATING:
2316 case D_UNKNOWN: 2336 case D_UNKNOWN:
2317 case D_MASK: 2337 case D_MASK:
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 211fc44f84be..920ede2829d6 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -139,6 +139,8 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
139struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ 139struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
140mempool_t *drbd_request_mempool; 140mempool_t *drbd_request_mempool;
141mempool_t *drbd_ee_mempool; 141mempool_t *drbd_ee_mempool;
142mempool_t *drbd_md_io_page_pool;
143struct bio_set *drbd_md_io_bio_set;
142 144
143/* I do not use a standard mempool, because: 145/* I do not use a standard mempool, because:
144 1) I want to hand out the pre-allocated objects first. 146 1) I want to hand out the pre-allocated objects first.
@@ -159,7 +161,24 @@ static const struct block_device_operations drbd_ops = {
159 .release = drbd_release, 161 .release = drbd_release,
160}; 162};
161 163
162#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) 164static void bio_destructor_drbd(struct bio *bio)
165{
166 bio_free(bio, drbd_md_io_bio_set);
167}
168
169struct bio *bio_alloc_drbd(gfp_t gfp_mask)
170{
171 struct bio *bio;
172
173 if (!drbd_md_io_bio_set)
174 return bio_alloc(gfp_mask, 1);
175
176 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
177 if (!bio)
178 return NULL;
179 bio->bi_destructor = bio_destructor_drbd;
180 return bio;
181}
163 182
164#ifdef __CHECKER__ 183#ifdef __CHECKER__
165/* When checking with sparse, and this is an inline function, sparse will 184/* When checking with sparse, and this is an inline function, sparse will
@@ -208,6 +227,7 @@ static int tl_init(struct drbd_conf *mdev)
208 mdev->oldest_tle = b; 227 mdev->oldest_tle = b;
209 mdev->newest_tle = b; 228 mdev->newest_tle = b;
210 INIT_LIST_HEAD(&mdev->out_of_sequence_requests); 229 INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
230 INIT_LIST_HEAD(&mdev->barrier_acked_requests);
211 231
212 mdev->tl_hash = NULL; 232 mdev->tl_hash = NULL;
213 mdev->tl_hash_s = 0; 233 mdev->tl_hash_s = 0;
@@ -246,9 +266,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
246 new->n_writes = 0; 266 new->n_writes = 0;
247 267
248 newest_before = mdev->newest_tle; 268 newest_before = mdev->newest_tle;
249 /* never send a barrier number == 0, because that is special-cased 269 new->br_number = newest_before->br_number+1;
250 * when using TCQ for our write ordering code */
251 new->br_number = (newest_before->br_number+1) ?: 1;
252 if (mdev->newest_tle != new) { 270 if (mdev->newest_tle != new) {
253 mdev->newest_tle->next = new; 271 mdev->newest_tle->next = new;
254 mdev->newest_tle = new; 272 mdev->newest_tle = new;
@@ -311,7 +329,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
311 These have been list_move'd to the out_of_sequence_requests list in 329 These have been list_move'd to the out_of_sequence_requests list in
312 _req_mod(, barrier_acked) above. 330 _req_mod(, barrier_acked) above.
313 */ 331 */
314 list_del_init(&b->requests); 332 list_splice_init(&b->requests, &mdev->barrier_acked_requests);
315 333
316 nob = b->next; 334 nob = b->next;
317 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { 335 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
@@ -411,6 +429,23 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
411 b = tmp; 429 b = tmp;
412 list_splice(&carry_reads, &b->requests); 430 list_splice(&carry_reads, &b->requests);
413 } 431 }
432
433 /* Actions operating on the disk state, also want to work on
434 requests that got barrier acked. */
435 switch (what) {
436 case fail_frozen_disk_io:
437 case restart_frozen_disk_io:
438 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
439 req = list_entry(le, struct drbd_request, tl_requests);
440 _req_mod(req, what);
441 }
442
443 case connection_lost_while_pending:
444 case resend:
445 break;
446 default:
447 dev_err(DEV, "what = %d in _tl_restart()\n", what);
448 }
414} 449}
415 450
416 451
@@ -458,6 +493,38 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
458} 493}
459 494
460/** 495/**
496 * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
497 * @mdev: DRBD device.
498 */
499void tl_abort_disk_io(struct drbd_conf *mdev)
500{
501 struct drbd_tl_epoch *b;
502 struct list_head *le, *tle;
503 struct drbd_request *req;
504
505 spin_lock_irq(&mdev->req_lock);
506 b = mdev->oldest_tle;
507 while (b) {
508 list_for_each_safe(le, tle, &b->requests) {
509 req = list_entry(le, struct drbd_request, tl_requests);
510 if (!(req->rq_state & RQ_LOCAL_PENDING))
511 continue;
512 _req_mod(req, abort_disk_io);
513 }
514 b = b->next;
515 }
516
517 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
518 req = list_entry(le, struct drbd_request, tl_requests);
519 if (!(req->rq_state & RQ_LOCAL_PENDING))
520 continue;
521 _req_mod(req, abort_disk_io);
522 }
523
524 spin_unlock_irq(&mdev->req_lock);
525}
526
527/**
461 * cl_wide_st_chg() - true if the state change is a cluster wide one 528 * cl_wide_st_chg() - true if the state change is a cluster wide one
462 * @mdev: DRBD device. 529 * @mdev: DRBD device.
463 * @os: old (current) state. 530 * @os: old (current) state.
@@ -470,7 +537,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
470 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || 537 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
471 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || 538 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
472 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || 539 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
473 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || 540 (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
474 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || 541 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
475 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); 542 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
476} 543}
@@ -509,8 +576,16 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
509static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, 576static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
510 union drbd_state, 577 union drbd_state,
511 union drbd_state); 578 union drbd_state);
579enum sanitize_state_warnings {
580 NO_WARNING,
581 ABORTED_ONLINE_VERIFY,
582 ABORTED_RESYNC,
583 CONNECTION_LOST_NEGOTIATING,
584 IMPLICITLY_UPGRADED_DISK,
585 IMPLICITLY_UPGRADED_PDSK,
586};
512static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, 587static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
513 union drbd_state ns, const char **warn_sync_abort); 588 union drbd_state ns, enum sanitize_state_warnings *warn);
514int drbd_send_state_req(struct drbd_conf *, 589int drbd_send_state_req(struct drbd_conf *,
515 union drbd_state, union drbd_state); 590 union drbd_state, union drbd_state);
516 591
@@ -785,6 +860,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
785 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) 860 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
786 rv = SS_IN_TRANSIENT_STATE; 861 rv = SS_IN_TRANSIENT_STATE;
787 862
863 /* While establishing a connection only allow cstate to change.
864 Delay/refuse role changes, detach attach etc... */
865 if (test_bit(STATE_SENT, &mdev->flags) &&
866 !(os.conn == C_WF_REPORT_PARAMS ||
867 (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
868 rv = SS_IN_TRANSIENT_STATE;
869
788 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) 870 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
789 rv = SS_NEED_CONNECTION; 871 rv = SS_NEED_CONNECTION;
790 872
@@ -803,6 +885,21 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
803 return rv; 885 return rv;
804} 886}
805 887
888static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
889{
890 static const char *msg_table[] = {
891 [NO_WARNING] = "",
892 [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
893 [ABORTED_RESYNC] = "Resync aborted.",
894 [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
895 [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
896 [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
897 };
898
899 if (warn != NO_WARNING)
900 dev_warn(DEV, "%s\n", msg_table[warn]);
901}
902
806/** 903/**
807 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition 904 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
808 * @mdev: DRBD device. 905 * @mdev: DRBD device.
@@ -814,11 +911,14 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
814 * to D_UNKNOWN. This rule and many more along those lines are in this function. 911 * to D_UNKNOWN. This rule and many more along those lines are in this function.
815 */ 912 */
816static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, 913static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
817 union drbd_state ns, const char **warn_sync_abort) 914 union drbd_state ns, enum sanitize_state_warnings *warn)
818{ 915{
819 enum drbd_fencing_p fp; 916 enum drbd_fencing_p fp;
820 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; 917 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
821 918
919 if (warn)
920 *warn = NO_WARNING;
921
822 fp = FP_DONT_CARE; 922 fp = FP_DONT_CARE;
823 if (get_ldev(mdev)) { 923 if (get_ldev(mdev)) {
824 fp = mdev->ldev->dc.fencing; 924 fp = mdev->ldev->dc.fencing;
@@ -833,18 +933,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
833 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. 933 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
834 * If you try to go into some Sync* state, that shall fail (elsewhere). */ 934 * If you try to go into some Sync* state, that shall fail (elsewhere). */
835 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && 935 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
836 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) 936 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
837 ns.conn = os.conn; 937 ns.conn = os.conn;
838 938
839 /* we cannot fail (again) if we already detached */ 939 /* we cannot fail (again) if we already detached */
840 if (ns.disk == D_FAILED && os.disk == D_DISKLESS) 940 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
841 ns.disk = D_DISKLESS; 941 ns.disk = D_DISKLESS;
842 942
843 /* if we are only D_ATTACHING yet,
844 * we can (and should) go directly to D_DISKLESS. */
845 if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
846 ns.disk = D_DISKLESS;
847
848 /* After C_DISCONNECTING only C_STANDALONE may follow */ 943 /* After C_DISCONNECTING only C_STANDALONE may follow */
849 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) 944 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
850 ns.conn = os.conn; 945 ns.conn = os.conn;
@@ -863,10 +958,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
863 /* Abort resync if a disk fails/detaches */ 958 /* Abort resync if a disk fails/detaches */
864 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && 959 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
865 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { 960 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
866 if (warn_sync_abort) 961 if (warn)
867 *warn_sync_abort = 962 *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
868 os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? 963 ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
869 "Online-verify" : "Resync";
870 ns.conn = C_CONNECTED; 964 ns.conn = C_CONNECTED;
871 } 965 }
872 966
@@ -877,7 +971,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
877 ns.disk = mdev->new_state_tmp.disk; 971 ns.disk = mdev->new_state_tmp.disk;
878 ns.pdsk = mdev->new_state_tmp.pdsk; 972 ns.pdsk = mdev->new_state_tmp.pdsk;
879 } else { 973 } else {
880 dev_alert(DEV, "Connection lost while negotiating, no data!\n"); 974 if (warn)
975 *warn = CONNECTION_LOST_NEGOTIATING;
881 ns.disk = D_DISKLESS; 976 ns.disk = D_DISKLESS;
882 ns.pdsk = D_UNKNOWN; 977 ns.pdsk = D_UNKNOWN;
883 } 978 }
@@ -959,16 +1054,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
959 ns.disk = disk_max; 1054 ns.disk = disk_max;
960 1055
961 if (ns.disk < disk_min) { 1056 if (ns.disk < disk_min) {
962 dev_warn(DEV, "Implicitly set disk from %s to %s\n", 1057 if (warn)
963 drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); 1058 *warn = IMPLICITLY_UPGRADED_DISK;
964 ns.disk = disk_min; 1059 ns.disk = disk_min;
965 } 1060 }
966 if (ns.pdsk > pdsk_max) 1061 if (ns.pdsk > pdsk_max)
967 ns.pdsk = pdsk_max; 1062 ns.pdsk = pdsk_max;
968 1063
969 if (ns.pdsk < pdsk_min) { 1064 if (ns.pdsk < pdsk_min) {
970 dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", 1065 if (warn)
971 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); 1066 *warn = IMPLICITLY_UPGRADED_PDSK;
972 ns.pdsk = pdsk_min; 1067 ns.pdsk = pdsk_min;
973 } 1068 }
974 1069
@@ -1045,12 +1140,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1045{ 1140{
1046 union drbd_state os; 1141 union drbd_state os;
1047 enum drbd_state_rv rv = SS_SUCCESS; 1142 enum drbd_state_rv rv = SS_SUCCESS;
1048 const char *warn_sync_abort = NULL; 1143 enum sanitize_state_warnings ssw;
1049 struct after_state_chg_work *ascw; 1144 struct after_state_chg_work *ascw;
1050 1145
1051 os = mdev->state; 1146 os = mdev->state;
1052 1147
1053 ns = sanitize_state(mdev, os, ns, &warn_sync_abort); 1148 ns = sanitize_state(mdev, os, ns, &ssw);
1054 1149
1055 if (ns.i == os.i) 1150 if (ns.i == os.i)
1056 return SS_NOTHING_TO_DO; 1151 return SS_NOTHING_TO_DO;
@@ -1076,8 +1171,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1076 return rv; 1171 return rv;
1077 } 1172 }
1078 1173
1079 if (warn_sync_abort) 1174 print_sanitize_warnings(mdev, ssw);
1080 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
1081 1175
1082 { 1176 {
1083 char *pbp, pb[300]; 1177 char *pbp, pb[300];
@@ -1243,7 +1337,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1243 drbd_thread_stop_nowait(&mdev->receiver); 1337 drbd_thread_stop_nowait(&mdev->receiver);
1244 1338
1245 /* Upon network failure, we need to restart the receiver. */ 1339 /* Upon network failure, we need to restart the receiver. */
1246 if (os.conn > C_TEAR_DOWN && 1340 if (os.conn > C_WF_CONNECTION &&
1247 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) 1341 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1248 drbd_thread_restart_nowait(&mdev->receiver); 1342 drbd_thread_restart_nowait(&mdev->receiver);
1249 1343
@@ -1251,6 +1345,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1251 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) 1345 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1252 drbd_resume_al(mdev); 1346 drbd_resume_al(mdev);
1253 1347
1348 /* remember last connect and attach times so request_timer_fn() won't
1349 * kill newly established sessions while we are still trying to thaw
1350 * previously frozen IO */
1351 if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
1352 mdev->last_reconnect_jif = jiffies;
1353 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1354 ns.disk > D_NEGOTIATING)
1355 mdev->last_reattach_jif = jiffies;
1356
1254 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); 1357 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1255 if (ascw) { 1358 if (ascw) {
1256 ascw->os = os; 1359 ascw->os = os;
@@ -1354,12 +1457,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1354 /* Here we have the actions that are performed after a 1457 /* Here we have the actions that are performed after a
1355 state change. This function might sleep */ 1458 state change. This function might sleep */
1356 1459
1460 if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
1461 mod_timer(&mdev->request_timer, jiffies + HZ);
1462
1357 nsm.i = -1; 1463 nsm.i = -1;
1358 if (ns.susp_nod) { 1464 if (ns.susp_nod) {
1359 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) 1465 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1360 what = resend; 1466 what = resend;
1361 1467
1362 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) 1468 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1469 ns.disk > D_NEGOTIATING)
1363 what = restart_frozen_disk_io; 1470 what = restart_frozen_disk_io;
1364 1471
1365 if (what != nothing) 1472 if (what != nothing)
@@ -1408,7 +1515,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1408 /* Do not change the order of the if above and the two below... */ 1515 /* Do not change the order of the if above and the two below... */
1409 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ 1516 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1410 drbd_send_uuids(mdev); 1517 drbd_send_uuids(mdev);
1411 drbd_send_state(mdev); 1518 drbd_send_state(mdev, ns);
1412 } 1519 }
1413 /* No point in queuing send_bitmap if we don't have a connection 1520 /* No point in queuing send_bitmap if we don't have a connection
1414 * anymore, so check also the _current_ state, not only the new state 1521 * anymore, so check also the _current_ state, not only the new state
@@ -1441,11 +1548,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1441 } 1548 }
1442 1549
1443 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1550 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1444 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { 1551 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
1552 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1445 drbd_uuid_new_current(mdev); 1553 drbd_uuid_new_current(mdev);
1446 drbd_send_uuids(mdev); 1554 drbd_send_uuids(mdev);
1447 } 1555 }
1448
1449 /* D_DISKLESS Peer becomes secondary */ 1556 /* D_DISKLESS Peer becomes secondary */
1450 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1557 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1451 /* We may still be Primary ourselves. 1558 /* We may still be Primary ourselves.
@@ -1473,14 +1580,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1473 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { 1580 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1474 drbd_send_sizes(mdev, 0, 0); /* to start sync... */ 1581 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1475 drbd_send_uuids(mdev); 1582 drbd_send_uuids(mdev);
1476 drbd_send_state(mdev); 1583 drbd_send_state(mdev, ns);
1477 } 1584 }
1478 1585
1479 /* We want to pause/continue resync, tell peer. */ 1586 /* We want to pause/continue resync, tell peer. */
1480 if (ns.conn >= C_CONNECTED && 1587 if (ns.conn >= C_CONNECTED &&
1481 ((os.aftr_isp != ns.aftr_isp) || 1588 ((os.aftr_isp != ns.aftr_isp) ||
1482 (os.user_isp != ns.user_isp))) 1589 (os.user_isp != ns.user_isp)))
1483 drbd_send_state(mdev); 1590 drbd_send_state(mdev, ns);
1484 1591
1485 /* In case one of the isp bits got set, suspend other devices. */ 1592 /* In case one of the isp bits got set, suspend other devices. */
1486 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && 1593 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1490,10 +1597,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1490 /* Make sure the peer gets informed about eventual state 1597 /* Make sure the peer gets informed about eventual state
1491 changes (ISP bits) while we were in WFReportParams. */ 1598 changes (ISP bits) while we were in WFReportParams. */
1492 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) 1599 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1493 drbd_send_state(mdev); 1600 drbd_send_state(mdev, ns);
1494 1601
1495 if (os.conn != C_AHEAD && ns.conn == C_AHEAD) 1602 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1496 drbd_send_state(mdev); 1603 drbd_send_state(mdev, ns);
1497 1604
1498 /* We are in the progress to start a full sync... */ 1605 /* We are in the progress to start a full sync... */
1499 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || 1606 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1513,33 +1620,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1513 /* first half of local IO error, failure to attach, 1620 /* first half of local IO error, failure to attach,
1514 * or administrative detach */ 1621 * or administrative detach */
1515 if (os.disk != D_FAILED && ns.disk == D_FAILED) { 1622 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1516 enum drbd_io_error_p eh; 1623 enum drbd_io_error_p eh = EP_PASS_ON;
1517 int was_io_error; 1624 int was_io_error = 0;
1518 /* corresponding get_ldev was in __drbd_set_state, to serialize 1625 /* corresponding get_ldev was in __drbd_set_state, to serialize
1519 * our cleanup here with the transition to D_DISKLESS, 1626 * our cleanup here with the transition to D_DISKLESS.
1520 * so it is safe to dreference ldev here. */ 1627 * But is is still not save to dreference ldev here, since
1521 eh = mdev->ldev->dc.on_io_error; 1628 * we might come from an failed Attach before ldev was set. */
1522 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); 1629 if (mdev->ldev) {
1523 1630 eh = mdev->ldev->dc.on_io_error;
1524 /* current state still has to be D_FAILED, 1631 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1525 * there is only one way out: to D_DISKLESS, 1632
1526 * and that may only happen after our put_ldev below. */ 1633 /* Immediately allow completion of all application IO, that waits
1527 if (mdev->state.disk != D_FAILED) 1634 for completion from the local disk. */
1528 dev_err(DEV, 1635 tl_abort_disk_io(mdev);
1529 "ASSERT FAILED: disk is %s during detach\n", 1636
1530 drbd_disk_str(mdev->state.disk)); 1637 /* current state still has to be D_FAILED,
1531 1638 * there is only one way out: to D_DISKLESS,
1532 if (drbd_send_state(mdev)) 1639 * and that may only happen after our put_ldev below. */
1533 dev_warn(DEV, "Notified peer that I am detaching my disk\n"); 1640 if (mdev->state.disk != D_FAILED)
1534 else 1641 dev_err(DEV,
1535 dev_err(DEV, "Sending state for detaching disk failed\n"); 1642 "ASSERT FAILED: disk is %s during detach\n",
1536 1643 drbd_disk_str(mdev->state.disk));
1537 drbd_rs_cancel_all(mdev); 1644
1538 1645 if (ns.conn >= C_CONNECTED)
1539 /* In case we want to get something to stable storage still, 1646 drbd_send_state(mdev, ns);
1540 * this may be the last chance. 1647
1541 * Following put_ldev may transition to D_DISKLESS. */ 1648 drbd_rs_cancel_all(mdev);
1542 drbd_md_sync(mdev); 1649
1650 /* In case we want to get something to stable storage still,
1651 * this may be the last chance.
1652 * Following put_ldev may transition to D_DISKLESS. */
1653 drbd_md_sync(mdev);
1654 }
1543 put_ldev(mdev); 1655 put_ldev(mdev);
1544 1656
1545 if (was_io_error && eh == EP_CALL_HELPER) 1657 if (was_io_error && eh == EP_CALL_HELPER)
@@ -1561,16 +1673,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1561 mdev->rs_failed = 0; 1673 mdev->rs_failed = 0;
1562 atomic_set(&mdev->rs_pending_cnt, 0); 1674 atomic_set(&mdev->rs_pending_cnt, 0);
1563 1675
1564 if (drbd_send_state(mdev)) 1676 if (ns.conn >= C_CONNECTED)
1565 dev_warn(DEV, "Notified peer that I'm now diskless.\n"); 1677 drbd_send_state(mdev, ns);
1678
1566 /* corresponding get_ldev in __drbd_set_state 1679 /* corresponding get_ldev in __drbd_set_state
1567 * this may finally trigger drbd_ldev_destroy. */ 1680 * this may finally trigger drbd_ldev_destroy. */
1568 put_ldev(mdev); 1681 put_ldev(mdev);
1569 } 1682 }
1570 1683
1571 /* Notify peer that I had a local IO error, and did not detached.. */ 1684 /* Notify peer that I had a local IO error, and did not detached.. */
1572 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) 1685 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
1573 drbd_send_state(mdev); 1686 drbd_send_state(mdev, ns);
1574 1687
1575 /* Disks got bigger while they were detached */ 1688 /* Disks got bigger while they were detached */
1576 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && 1689 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1588,7 +1701,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1588 /* sync target done with resync. Explicitly notify peer, even though 1701 /* sync target done with resync. Explicitly notify peer, even though
1589 * it should (at least for non-empty resyncs) already know itself. */ 1702 * it should (at least for non-empty resyncs) already know itself. */
1590 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) 1703 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1591 drbd_send_state(mdev); 1704 drbd_send_state(mdev, ns);
1705
1706 /* Wake up role changes, that were delayed because of connection establishing */
1707 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
1708 clear_bit(STATE_SENT, &mdev->flags);
1709 wake_up(&mdev->state_wait);
1710 }
1592 1711
1593 /* This triggers bitmap writeout of potentially still unwritten pages 1712 /* This triggers bitmap writeout of potentially still unwritten pages
1594 * if the resync finished cleanly, or aborted because of peer disk 1713 * if the resync finished cleanly, or aborted because of peer disk
@@ -1598,8 +1717,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1598 * No harm done if some bits change during this phase. 1717 * No harm done if some bits change during this phase.
1599 */ 1718 */
1600 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { 1719 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1601 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, 1720 drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
1602 "write from resync_finished", BM_LOCKED_SET_ALLOWED); 1721 "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
1603 put_ldev(mdev); 1722 put_ldev(mdev);
1604 } 1723 }
1605 1724
@@ -2057,7 +2176,11 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
2057 2176
2058 D_ASSERT(mdev->state.disk == D_UP_TO_DATE); 2177 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2059 2178
2060 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; 2179 uuid = mdev->ldev->md.uuid[UI_BITMAP];
2180 if (uuid && uuid != UUID_JUST_CREATED)
2181 uuid = uuid + UUID_NEW_BM_OFFSET;
2182 else
2183 get_random_bytes(&uuid, sizeof(u64));
2061 drbd_uuid_set(mdev, UI_BITMAP, uuid); 2184 drbd_uuid_set(mdev, UI_BITMAP, uuid);
2062 drbd_print_uuids(mdev, "updated sync UUID"); 2185 drbd_print_uuids(mdev, "updated sync UUID");
2063 drbd_md_sync(mdev); 2186 drbd_md_sync(mdev);
@@ -2089,6 +2212,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
2089 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ 2212 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
2090 } 2213 }
2091 2214
2215 /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
2216 if (mdev->agreed_pro_version <= 94)
2217 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
2218
2092 p.d_size = cpu_to_be64(d_size); 2219 p.d_size = cpu_to_be64(d_size);
2093 p.u_size = cpu_to_be64(u_size); 2220 p.u_size = cpu_to_be64(u_size);
2094 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); 2221 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
@@ -2102,10 +2229,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
2102} 2229}
2103 2230
2104/** 2231/**
2105 * drbd_send_state() - Sends the drbd state to the peer 2232 * drbd_send_current_state() - Sends the drbd state to the peer
2106 * @mdev: DRBD device. 2233 * @mdev: DRBD device.
2107 */ 2234 */
2108int drbd_send_state(struct drbd_conf *mdev) 2235int drbd_send_current_state(struct drbd_conf *mdev)
2109{ 2236{
2110 struct socket *sock; 2237 struct socket *sock;
2111 struct p_state p; 2238 struct p_state p;
@@ -2131,6 +2258,37 @@ int drbd_send_state(struct drbd_conf *mdev)
2131 return ok; 2258 return ok;
2132} 2259}
2133 2260
2261/**
2262 * drbd_send_state() - After a state change, sends the new state to the peer
2263 * @mdev: DRBD device.
2264 * @state: the state to send, not necessarily the current state.
2265 *
2266 * Each state change queues an "after_state_ch" work, which will eventually
2267 * send the resulting new state to the peer. If more state changes happen
2268 * between queuing and processing of the after_state_ch work, we still
2269 * want to send each intermediary state in the order it occurred.
2270 */
2271int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
2272{
2273 struct socket *sock;
2274 struct p_state p;
2275 int ok = 0;
2276
2277 mutex_lock(&mdev->data.mutex);
2278
2279 p.state = cpu_to_be32(state.i);
2280 sock = mdev->data.socket;
2281
2282 if (likely(sock != NULL)) {
2283 ok = _drbd_send_cmd(mdev, sock, P_STATE,
2284 (struct p_header80 *)&p, sizeof(p), 0);
2285 }
2286
2287 mutex_unlock(&mdev->data.mutex);
2288
2289 return ok;
2290}
2291
2134int drbd_send_state_req(struct drbd_conf *mdev, 2292int drbd_send_state_req(struct drbd_conf *mdev,
2135 union drbd_state mask, union drbd_state val) 2293 union drbd_state mask, union drbd_state val)
2136{ 2294{
@@ -2615,7 +2773,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2615 struct bio_vec *bvec; 2773 struct bio_vec *bvec;
2616 int i; 2774 int i;
2617 /* hint all but last page with MSG_MORE */ 2775 /* hint all but last page with MSG_MORE */
2618 __bio_for_each_segment(bvec, bio, i, 0) { 2776 bio_for_each_segment(bvec, bio, i) {
2619 if (!_drbd_no_send_page(mdev, bvec->bv_page, 2777 if (!_drbd_no_send_page(mdev, bvec->bv_page,
2620 bvec->bv_offset, bvec->bv_len, 2778 bvec->bv_offset, bvec->bv_len,
2621 i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) 2779 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
@@ -2629,7 +2787,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2629 struct bio_vec *bvec; 2787 struct bio_vec *bvec;
2630 int i; 2788 int i;
2631 /* hint all but last page with MSG_MORE */ 2789 /* hint all but last page with MSG_MORE */
2632 __bio_for_each_segment(bvec, bio, i, 0) { 2790 bio_for_each_segment(bvec, bio, i) {
2633 if (!_drbd_send_page(mdev, bvec->bv_page, 2791 if (!_drbd_send_page(mdev, bvec->bv_page,
2634 bvec->bv_offset, bvec->bv_len, 2792 bvec->bv_offset, bvec->bv_len,
2635 i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) 2793 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
@@ -2695,8 +2853,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2695 2853
2696 p.sector = cpu_to_be64(req->sector); 2854 p.sector = cpu_to_be64(req->sector);
2697 p.block_id = (unsigned long)req; 2855 p.block_id = (unsigned long)req;
2698 p.seq_num = cpu_to_be32(req->seq_num = 2856 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2699 atomic_add_return(1, &mdev->packet_seq));
2700 2857
2701 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); 2858 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2702 2859
@@ -2987,8 +3144,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2987 atomic_set(&mdev->rs_sect_in, 0); 3144 atomic_set(&mdev->rs_sect_in, 0);
2988 atomic_set(&mdev->rs_sect_ev, 0); 3145 atomic_set(&mdev->rs_sect_ev, 0);
2989 atomic_set(&mdev->ap_in_flight, 0); 3146 atomic_set(&mdev->ap_in_flight, 0);
3147 atomic_set(&mdev->md_io_in_use, 0);
2990 3148
2991 mutex_init(&mdev->md_io_mutex);
2992 mutex_init(&mdev->data.mutex); 3149 mutex_init(&mdev->data.mutex);
2993 mutex_init(&mdev->meta.mutex); 3150 mutex_init(&mdev->meta.mutex);
2994 sema_init(&mdev->data.work.s, 0); 3151 sema_init(&mdev->data.work.s, 0);
@@ -3126,6 +3283,10 @@ static void drbd_destroy_mempools(void)
3126 3283
3127 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ 3284 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
3128 3285
3286 if (drbd_md_io_bio_set)
3287 bioset_free(drbd_md_io_bio_set);
3288 if (drbd_md_io_page_pool)
3289 mempool_destroy(drbd_md_io_page_pool);
3129 if (drbd_ee_mempool) 3290 if (drbd_ee_mempool)
3130 mempool_destroy(drbd_ee_mempool); 3291 mempool_destroy(drbd_ee_mempool);
3131 if (drbd_request_mempool) 3292 if (drbd_request_mempool)
@@ -3139,6 +3300,8 @@ static void drbd_destroy_mempools(void)
3139 if (drbd_al_ext_cache) 3300 if (drbd_al_ext_cache)
3140 kmem_cache_destroy(drbd_al_ext_cache); 3301 kmem_cache_destroy(drbd_al_ext_cache);
3141 3302
3303 drbd_md_io_bio_set = NULL;
3304 drbd_md_io_page_pool = NULL;
3142 drbd_ee_mempool = NULL; 3305 drbd_ee_mempool = NULL;
3143 drbd_request_mempool = NULL; 3306 drbd_request_mempool = NULL;
3144 drbd_ee_cache = NULL; 3307 drbd_ee_cache = NULL;
@@ -3162,6 +3325,8 @@ static int drbd_create_mempools(void)
3162 drbd_bm_ext_cache = NULL; 3325 drbd_bm_ext_cache = NULL;
3163 drbd_al_ext_cache = NULL; 3326 drbd_al_ext_cache = NULL;
3164 drbd_pp_pool = NULL; 3327 drbd_pp_pool = NULL;
3328 drbd_md_io_page_pool = NULL;
3329 drbd_md_io_bio_set = NULL;
3165 3330
3166 /* caches */ 3331 /* caches */
3167 drbd_request_cache = kmem_cache_create( 3332 drbd_request_cache = kmem_cache_create(
@@ -3185,6 +3350,16 @@ static int drbd_create_mempools(void)
3185 goto Enomem; 3350 goto Enomem;
3186 3351
3187 /* mempools */ 3352 /* mempools */
3353#ifdef COMPAT_HAVE_BIOSET_CREATE
3354 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
3355 if (drbd_md_io_bio_set == NULL)
3356 goto Enomem;
3357#endif
3358
3359 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
3360 if (drbd_md_io_page_pool == NULL)
3361 goto Enomem;
3362
3188 drbd_request_mempool = mempool_create(number, 3363 drbd_request_mempool = mempool_create(number,
3189 mempool_alloc_slab, mempool_free_slab, drbd_request_cache); 3364 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
3190 if (drbd_request_mempool == NULL) 3365 if (drbd_request_mempool == NULL)
@@ -3262,6 +3437,8 @@ static void drbd_delete_device(unsigned int minor)
3262 if (!mdev) 3437 if (!mdev)
3263 return; 3438 return;
3264 3439
3440 del_timer_sync(&mdev->request_timer);
3441
3265 /* paranoia asserts */ 3442 /* paranoia asserts */
3266 if (mdev->open_cnt != 0) 3443 if (mdev->open_cnt != 0)
3267 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, 3444 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
@@ -3666,8 +3843,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
3666 if (!get_ldev_if_state(mdev, D_FAILED)) 3843 if (!get_ldev_if_state(mdev, D_FAILED))
3667 return; 3844 return;
3668 3845
3669 mutex_lock(&mdev->md_io_mutex); 3846 buffer = drbd_md_get_buffer(mdev);
3670 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); 3847 if (!buffer)
3848 goto out;
3849
3671 memset(buffer, 0, 512); 3850 memset(buffer, 0, 512);
3672 3851
3673 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); 3852 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -3698,7 +3877,8 @@ void drbd_md_sync(struct drbd_conf *mdev)
3698 * since we updated it on metadata. */ 3877 * since we updated it on metadata. */
3699 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); 3878 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
3700 3879
3701 mutex_unlock(&mdev->md_io_mutex); 3880 drbd_md_put_buffer(mdev);
3881out:
3702 put_ldev(mdev); 3882 put_ldev(mdev);
3703} 3883}
3704 3884
@@ -3718,8 +3898,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3718 if (!get_ldev_if_state(mdev, D_ATTACHING)) 3898 if (!get_ldev_if_state(mdev, D_ATTACHING))
3719 return ERR_IO_MD_DISK; 3899 return ERR_IO_MD_DISK;
3720 3900
3721 mutex_lock(&mdev->md_io_mutex); 3901 buffer = drbd_md_get_buffer(mdev);
3722 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); 3902 if (!buffer)
3903 goto out;
3723 3904
3724 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { 3905 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
3725 /* NOTE: can't do normal error processing here as this is 3906 /* NOTE: can't do normal error processing here as this is
@@ -3780,7 +3961,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3780 mdev->sync_conf.al_extents = 127; 3961 mdev->sync_conf.al_extents = 127;
3781 3962
3782 err: 3963 err:
3783 mutex_unlock(&mdev->md_io_mutex); 3964 drbd_md_put_buffer(mdev);
3965 out:
3784 put_ldev(mdev); 3966 put_ldev(mdev);
3785 3967
3786 return rv; 3968 return rv;
@@ -4183,12 +4365,11 @@ const char *drbd_buildtag(void)
4183 static char buildtag[38] = "\0uilt-in"; 4365 static char buildtag[38] = "\0uilt-in";
4184 4366
4185 if (buildtag[0] == 0) { 4367 if (buildtag[0] == 0) {
4186#ifdef CONFIG_MODULES 4368#ifdef MODULE
4187 if (THIS_MODULE != NULL) 4369 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
4188 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); 4370#else
4189 else 4371 buildtag[0] = 'b';
4190#endif 4372#endif
4191 buildtag[0] = 'b';
4192 } 4373 }
4193 4374
4194 return buildtag; 4375 return buildtag;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index af2a25049bce..6d4de6a72e80 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
180 180
181 drbd_bcast_ev_helper(mdev, cmd); 181 drbd_bcast_ev_helper(mdev, cmd);
182 ret = call_usermodehelper(usermode_helper, argv, envp, 1); 182 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
183 if (ret) 183 if (ret)
184 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 184 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
185 usermode_helper, cmd, mb, 185 usermode_helper, cmd, mb,
@@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data)
289 */ 289 */
290 spin_lock_irq(&mdev->req_lock); 290 spin_lock_irq(&mdev->req_lock);
291 ns = mdev->state; 291 ns = mdev->state;
292 if (ns.conn < C_WF_REPORT_PARAMS) { 292 if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) {
293 ns.pdsk = nps; 293 ns.pdsk = nps;
294 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 294 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
295 } 295 }
@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
432 /* if this was forced, we should consider sync */ 432 /* if this was forced, we should consider sync */
433 if (forced) 433 if (forced)
434 drbd_send_uuids(mdev); 434 drbd_send_uuids(mdev);
435 drbd_send_state(mdev); 435 drbd_send_current_state(mdev);
436 } 436 }
437 437
438 drbd_md_sync(mdev); 438 drbd_md_sync(mdev);
@@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
845 Because new from 8.3.8 onwards the peer can use multiple 845 Because new from 8.3.8 onwards the peer can use multiple
846 BIOs for a single peer_request */ 846 BIOs for a single peer_request */
847 if (mdev->state.conn >= C_CONNECTED) { 847 if (mdev->state.conn >= C_CONNECTED) {
848 if (mdev->agreed_pro_version < 94) 848 if (mdev->agreed_pro_version < 94) {
849 peer = mdev->peer_max_bio_size; 849 peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
850 else if (mdev->agreed_pro_version == 94) 850 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
851 } else if (mdev->agreed_pro_version == 94)
851 peer = DRBD_MAX_SIZE_H80_PACKET; 852 peer = DRBD_MAX_SIZE_H80_PACKET;
852 else /* drbd 8.3.8 onwards */ 853 else /* drbd 8.3.8 onwards */
853 peer = DRBD_MAX_BIO_SIZE; 854 peer = DRBD_MAX_BIO_SIZE;
@@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1032 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", 1033 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1033 (unsigned long long) drbd_get_max_capacity(nbc), 1034 (unsigned long long) drbd_get_max_capacity(nbc),
1034 (unsigned long long) nbc->dc.disk_size); 1035 (unsigned long long) nbc->dc.disk_size);
1035 retcode = ERR_DISK_TO_SMALL; 1036 retcode = ERR_DISK_TOO_SMALL;
1036 goto fail; 1037 goto fail;
1037 } 1038 }
1038 1039
@@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1046 } 1047 }
1047 1048
1048 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { 1049 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1049 retcode = ERR_MD_DISK_TO_SMALL; 1050 retcode = ERR_MD_DISK_TOO_SMALL;
1050 dev_warn(DEV, "refusing attach: md-device too small, " 1051 dev_warn(DEV, "refusing attach: md-device too small, "
1051 "at least %llu sectors needed for this meta-disk type\n", 1052 "at least %llu sectors needed for this meta-disk type\n",
1052 (unsigned long long) min_md_device_sectors); 1053 (unsigned long long) min_md_device_sectors);
@@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1057 * (we may currently be R_PRIMARY with no local disk...) */ 1058 * (we may currently be R_PRIMARY with no local disk...) */
1058 if (drbd_get_max_capacity(nbc) < 1059 if (drbd_get_max_capacity(nbc) <
1059 drbd_get_capacity(mdev->this_bdev)) { 1060 drbd_get_capacity(mdev->this_bdev)) {
1060 retcode = ERR_DISK_TO_SMALL; 1061 retcode = ERR_DISK_TOO_SMALL;
1061 goto fail; 1062 goto fail;
1062 } 1063 }
1063 1064
@@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1138 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 1139 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1139 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { 1140 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
1140 dev_warn(DEV, "refusing to truncate a consistent device\n"); 1141 dev_warn(DEV, "refusing to truncate a consistent device\n");
1141 retcode = ERR_DISK_TO_SMALL; 1142 retcode = ERR_DISK_TOO_SMALL;
1142 goto force_diskless_dec; 1143 goto force_diskless_dec;
1143 } 1144 }
1144 1145
@@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1336{ 1337{
1337 enum drbd_ret_code retcode; 1338 enum drbd_ret_code retcode;
1338 int ret; 1339 int ret;
1340 struct detach dt = {};
1341
1342 if (!detach_from_tags(mdev, nlp->tag_list, &dt)) {
1343 reply->ret_code = ERR_MANDATORY_TAG;
1344 goto out;
1345 }
1346
1347 if (dt.detach_force) {
1348 drbd_force_state(mdev, NS(disk, D_FAILED));
1349 reply->ret_code = SS_SUCCESS;
1350 goto out;
1351 }
1352
1339 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ 1353 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1354 drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */
1340 retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); 1355 retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
1356 drbd_md_put_buffer(mdev);
1341 /* D_FAILED will transition to DISKLESS. */ 1357 /* D_FAILED will transition to DISKLESS. */
1342 ret = wait_event_interruptible(mdev->misc_wait, 1358 ret = wait_event_interruptible(mdev->misc_wait,
1343 mdev->state.disk != D_FAILED); 1359 mdev->state.disk != D_FAILED);
1344 drbd_resume_io(mdev); 1360 drbd_resume_io(mdev);
1361
1345 if ((int)retcode == (int)SS_IS_DISKLESS) 1362 if ((int)retcode == (int)SS_IS_DISKLESS)
1346 retcode = SS_NOTHING_TO_DO; 1363 retcode = SS_NOTHING_TO_DO;
1347 if (ret) 1364 if (ret)
1348 retcode = ERR_INTR; 1365 retcode = ERR_INTR;
1349 reply->ret_code = retcode; 1366 reply->ret_code = retcode;
1367out:
1350 return 0; 1368 return 0;
1351} 1369}
1352 1370
@@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1711 1729
1712 if (rs.no_resync && mdev->agreed_pro_version < 93) { 1730 if (rs.no_resync && mdev->agreed_pro_version < 93) {
1713 retcode = ERR_NEED_APV_93; 1731 retcode = ERR_NEED_APV_93;
1714 goto fail; 1732 goto fail_ldev;
1715 } 1733 }
1716 1734
1717 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) 1735 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
@@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1738 fail: 1756 fail:
1739 reply->ret_code = retcode; 1757 reply->ret_code = retcode;
1740 return 0; 1758 return 0;
1759
1760 fail_ldev:
1761 put_ldev(mdev);
1762 goto fail;
1741} 1763}
1742 1764
1743static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1765static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1941 1963
1942 /* If there is still bitmap IO pending, probably because of a previous 1964 /* If there is still bitmap IO pending, probably because of a previous
1943 * resync just being finished, wait for it before requesting a new resync. */ 1965 * resync just being finished, wait for it before requesting a new resync. */
1966 drbd_suspend_io(mdev);
1944 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 1967 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1945 1968
1946 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); 1969 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
@@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1959 1982
1960 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); 1983 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
1961 } 1984 }
1985 drbd_resume_io(mdev);
1962 1986
1963 reply->ret_code = retcode; 1987 reply->ret_code = retcode;
1964 return 0; 1988 return 0;
@@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
1980 2004
1981 /* If there is still bitmap IO pending, probably because of a previous 2005 /* If there is still bitmap IO pending, probably because of a previous
1982 * resync just being finished, wait for it before requesting a new resync. */ 2006 * resync just being finished, wait for it before requesting a new resync. */
2007 drbd_suspend_io(mdev);
1983 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 2008 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1984 2009
1985 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); 2010 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
@@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
1998 } else 2023 } else
1999 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); 2024 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
2000 } 2025 }
2026 drbd_resume_io(mdev);
2001 2027
2002 reply->ret_code = retcode; 2028 reply->ret_code = retcode;
2003 return 0; 2029 return 0;
@@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2170 2196
2171 /* If there is still bitmap IO pending, e.g. previous resync or verify 2197 /* If there is still bitmap IO pending, e.g. previous resync or verify
2172 * just being finished, wait for it before requesting a new resync. */ 2198 * just being finished, wait for it before requesting a new resync. */
2199 drbd_suspend_io(mdev);
2173 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 2200 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2174 2201
2175 /* w_make_ov_request expects position to be aligned */ 2202 /* w_make_ov_request expects position to be aligned */
2176 mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; 2203 mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
2177 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); 2204 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2205 drbd_resume_io(mdev);
2178 return 0; 2206 return 0;
2179} 2207}
2180 2208
@@ -2297,7 +2325,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
2297 return; 2325 return;
2298 } 2326 }
2299 2327
2300 if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) { 2328 if (!capable(CAP_SYS_ADMIN)) {
2301 retcode = ERR_PERM; 2329 retcode = ERR_PERM;
2302 goto fail; 2330 goto fail;
2303 } 2331 }
@@ -2526,10 +2554,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
2526 2554
2527 page = e->pages; 2555 page = e->pages;
2528 page_chain_for_each(page) { 2556 page_chain_for_each(page) {
2529 void *d = kmap_atomic(page, KM_USER0); 2557 void *d = kmap_atomic(page);
2530 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2558 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2531 memcpy(tl, d, l); 2559 memcpy(tl, d, l);
2532 kunmap_atomic(d, KM_USER0); 2560 kunmap_atomic(d);
2533 tl = (unsigned short*)((char*)tl + l); 2561 tl = (unsigned short*)((char*)tl + l);
2534 len -= l; 2562 len -= l;
2535 if (len == 0) 2563 if (len == 0)
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 2959cdfb77f5..869bada2ed06 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
52 if (unlikely(v >= 1000000)) { 52 if (unlikely(v >= 1000000)) {
53 /* cool: > GiByte/s */ 53 /* cool: > GiByte/s */
54 seq_printf(seq, "%ld,", v / 1000000); 54 seq_printf(seq, "%ld,", v / 1000000);
55 v /= 1000000; 55 v %= 1000000;
56 seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); 56 seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
57 } else if (likely(v >= 1000)) 57 } else if (likely(v >= 1000))
58 seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); 58 seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 43beaca53179..ea4836e0ae98 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what,
466 goto out; 466 goto out;
467 } 467 }
468 (*newsock)->ops = sock->ops; 468 (*newsock)->ops = sock->ops;
469 __module_get((*newsock)->ops->owner);
469 470
470out: 471out:
471 return err; 472 return err;
@@ -664,7 +665,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
664 timeo = mdev->net_conf->try_connect_int * HZ; 665 timeo = mdev->net_conf->try_connect_int * HZ;
665 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ 666 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
666 667
667 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ 668 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
668 s_listen->sk->sk_rcvtimeo = timeo; 669 s_listen->sk->sk_rcvtimeo = timeo;
669 s_listen->sk->sk_sndtimeo = timeo; 670 s_listen->sk->sk_sndtimeo = timeo;
670 drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, 671 drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
@@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev)
750{ 751{
751 struct socket *s, *sock, *msock; 752 struct socket *s, *sock, *msock;
752 int try, h, ok; 753 int try, h, ok;
754 enum drbd_state_rv rv;
753 755
754 D_ASSERT(!mdev->data.socket); 756 D_ASSERT(!mdev->data.socket);
755 757
@@ -841,8 +843,8 @@ retry:
841 } 843 }
842 } while (1); 844 } while (1);
843 845
844 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */ 846 msock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
845 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */ 847 sock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
846 848
847 sock->sk->sk_allocation = GFP_NOIO; 849 sock->sk->sk_allocation = GFP_NOIO;
848 msock->sk->sk_allocation = GFP_NOIO; 850 msock->sk->sk_allocation = GFP_NOIO;
@@ -888,25 +890,32 @@ retry:
888 } 890 }
889 } 891 }
890 892
891 if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
892 return 0;
893
894 sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 893 sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
895 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 894 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
896 895
897 atomic_set(&mdev->packet_seq, 0); 896 atomic_set(&mdev->packet_seq, 0);
898 mdev->peer_seq = 0; 897 mdev->peer_seq = 0;
899 898
900 drbd_thread_start(&mdev->asender);
901
902 if (drbd_send_protocol(mdev) == -1) 899 if (drbd_send_protocol(mdev) == -1)
903 return -1; 900 return -1;
901 set_bit(STATE_SENT, &mdev->flags);
904 drbd_send_sync_param(mdev, &mdev->sync_conf); 902 drbd_send_sync_param(mdev, &mdev->sync_conf);
905 drbd_send_sizes(mdev, 0, 0); 903 drbd_send_sizes(mdev, 0, 0);
906 drbd_send_uuids(mdev); 904 drbd_send_uuids(mdev);
907 drbd_send_state(mdev); 905 drbd_send_current_state(mdev);
908 clear_bit(USE_DEGR_WFC_T, &mdev->flags); 906 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
909 clear_bit(RESIZE_PENDING, &mdev->flags); 907 clear_bit(RESIZE_PENDING, &mdev->flags);
908
909 spin_lock_irq(&mdev->req_lock);
910 rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL);
911 if (mdev->state.conn != C_WF_REPORT_PARAMS)
912 clear_bit(STATE_SENT, &mdev->flags);
913 spin_unlock_irq(&mdev->req_lock);
914
915 if (rv < SS_SUCCESS)
916 return 0;
917
918 drbd_thread_start(&mdev->asender);
910 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ 919 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
911 920
912 return 1; 921 return 1;
@@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev)
957 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, 966 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
958 NULL); 967 NULL);
959 if (rv) { 968 if (rv) {
960 dev_err(DEV, "local disk flush failed with status %d\n", rv); 969 dev_info(DEV, "local disk flush failed with status %d\n", rv);
961 /* would rather check on EOPNOTSUPP, but that is not reliable. 970 /* would rather check on EOPNOTSUPP, but that is not reliable.
962 * don't try again for ANY return value != 0 971 * don't try again for ANY return value != 0
963 * if (rv == -EOPNOTSUPP) */ 972 * if (rv == -EOPNOTSUPP) */
@@ -1001,13 +1010,14 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1001 1010
1002 if (epoch_size != 0 && 1011 if (epoch_size != 0 &&
1003 atomic_read(&epoch->active) == 0 && 1012 atomic_read(&epoch->active) == 0 &&
1004 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { 1013 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1005 if (!(ev & EV_CLEANUP)) { 1014 if (!(ev & EV_CLEANUP)) {
1006 spin_unlock(&mdev->epoch_lock); 1015 spin_unlock(&mdev->epoch_lock);
1007 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); 1016 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1008 spin_lock(&mdev->epoch_lock); 1017 spin_lock(&mdev->epoch_lock);
1009 } 1018 }
1010 dec_unacked(mdev); 1019 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1020 dec_unacked(mdev);
1011 1021
1012 if (mdev->current_epoch != epoch) { 1022 if (mdev->current_epoch != epoch) {
1013 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 1023 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
@@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1096 /* In most cases, we will only need one bio. But in case the lower 1106 /* In most cases, we will only need one bio. But in case the lower
1097 * level restrictions happen to be different at this offset on this 1107 * level restrictions happen to be different at this offset on this
1098 * side than those of the sending peer, we may need to submit the 1108 * side than those of the sending peer, we may need to submit the
1099 * request in more than one bio. */ 1109 * request in more than one bio.
1110 *
1111 * Plain bio_alloc is good enough here, this is no DRBD internally
1112 * generated bio, but a bio allocated on behalf of the peer.
1113 */
1100next_bio: 1114next_bio:
1101 bio = bio_alloc(GFP_NOIO, nr_pages); 1115 bio = bio_alloc(GFP_NOIO, nr_pages);
1102 if (!bio) { 1116 if (!bio) {
@@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
1583 return ok; 1597 return ok;
1584} 1598}
1585 1599
1600static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e)
1601{
1602
1603 struct drbd_epoch_entry *rs_e;
1604 bool rv = 0;
1605
1606 spin_lock_irq(&mdev->req_lock);
1607 list_for_each_entry(rs_e, &mdev->sync_ee, w.list) {
1608 if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) {
1609 rv = 1;
1610 break;
1611 }
1612 }
1613 spin_unlock_irq(&mdev->req_lock);
1614
1615 return rv;
1616}
1617
1586/* Called from receive_Data. 1618/* Called from receive_Data.
1587 * Synchronize packets on sock with packets on msock. 1619 * Synchronize packets on sock with packets on msock.
1588 * 1620 *
@@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1826 list_add(&e->w.list, &mdev->active_ee); 1858 list_add(&e->w.list, &mdev->active_ee);
1827 spin_unlock_irq(&mdev->req_lock); 1859 spin_unlock_irq(&mdev->req_lock);
1828 1860
1861 if (mdev->state.conn == C_SYNC_TARGET)
1862 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e));
1863
1829 switch (mdev->net_conf->wire_protocol) { 1864 switch (mdev->net_conf->wire_protocol) {
1830 case DRBD_PROT_C: 1865 case DRBD_PROT_C:
1831 inc_unacked(mdev); 1866 inc_unacked(mdev);
@@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2420 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; 2455 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2421 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; 2456 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
2422 2457
2423 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); 2458 dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
2424 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); 2459 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2425 2460
2426 return -1; 2461 return -1;
@@ -2806,10 +2841,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2806 2841
2807 if (apv >= 88) { 2842 if (apv >= 88) {
2808 if (apv == 88) { 2843 if (apv == 88) {
2809 if (data_size > SHARED_SECRET_MAX) { 2844 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
2810 dev_err(DEV, "verify-alg too long, " 2845 dev_err(DEV, "verify-alg of wrong size, "
2811 "peer wants %u, accepting only %u byte\n", 2846 "peer wants %u, accepting only up to %u byte\n",
2812 data_size, SHARED_SECRET_MAX); 2847 data_size, SHARED_SECRET_MAX);
2813 return false; 2848 return false;
2814 } 2849 }
2815 2850
@@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3168 os = ns = mdev->state; 3203 os = ns = mdev->state;
3169 spin_unlock_irq(&mdev->req_lock); 3204 spin_unlock_irq(&mdev->req_lock);
3170 3205
3171 /* peer says his disk is uptodate, while we think it is inconsistent, 3206 /* If some other part of the code (asender thread, timeout)
3172 * and this happens while we think we have a sync going on. */ 3207 * already decided to close the connection again,
3173 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && 3208 * we must not "re-establish" it here. */
3209 if (os.conn <= C_TEAR_DOWN)
3210 return false;
3211
3212 /* If this is the "end of sync" confirmation, usually the peer disk
3213 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3214 * set) resync started in PausedSyncT, or if the timing of pause-/
3215 * unpause-sync events has been "just right", the peer disk may
3216 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3217 */
3218 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3219 real_peer_disk == D_UP_TO_DATE &&
3174 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 3220 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3175 /* If we are (becoming) SyncSource, but peer is still in sync 3221 /* If we are (becoming) SyncSource, but peer is still in sync
3176 * preparation, ignore its uptodate-ness to avoid flapping, it 3222 * preparation, ignore its uptodate-ness to avoid flapping, it
@@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3288 /* Nowadays only used when forcing a node into primary role and 3334 /* Nowadays only used when forcing a node into primary role and
3289 setting its disk to UpToDate with that */ 3335 setting its disk to UpToDate with that */
3290 drbd_send_uuids(mdev); 3336 drbd_send_uuids(mdev);
3291 drbd_send_state(mdev); 3337 drbd_send_current_state(mdev);
3292 } 3338 }
3293 } 3339 }
3294 3340
@@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3776 if (mdev->state.conn == C_STANDALONE) 3822 if (mdev->state.conn == C_STANDALONE)
3777 return; 3823 return;
3778 3824
3825 /* We are about to start the cleanup after connection loss.
3826 * Make sure drbd_make_request knows about that.
3827 * Usually we should be in some network failure state already,
3828 * but just in case we are not, we fix it up here.
3829 */
3830 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
3831
3779 /* asender does not clean up anything. it must not interfere, either */ 3832 /* asender does not clean up anything. it must not interfere, either */
3780 drbd_thread_stop(&mdev->asender); 3833 drbd_thread_stop(&mdev->asender);
3781 drbd_free_sock(mdev); 3834 drbd_free_sock(mdev);
@@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3803 atomic_set(&mdev->rs_pending_cnt, 0); 3856 atomic_set(&mdev->rs_pending_cnt, 0);
3804 wake_up(&mdev->misc_wait); 3857 wake_up(&mdev->misc_wait);
3805 3858
3806 del_timer(&mdev->request_timer);
3807
3808 /* make sure syncer is stopped and w_resume_next_sg queued */ 3859 /* make sure syncer is stopped and w_resume_next_sg queued */
3809 del_timer_sync(&mdev->resync_timer); 3860 del_timer_sync(&mdev->resync_timer);
3810 resync_timer_fn((unsigned long)mdev); 3861 resync_timer_fn((unsigned long)mdev);
@@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
4433 4484
4434 if (mdev->state.conn == C_AHEAD && 4485 if (mdev->state.conn == C_AHEAD &&
4435 atomic_read(&mdev->ap_in_flight) == 0 && 4486 atomic_read(&mdev->ap_in_flight) == 0 &&
4436 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { 4487 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
4437 mdev->start_resync_timer.expires = jiffies + HZ; 4488 mdev->start_resync_timer.expires = jiffies + HZ;
4438 add_timer(&mdev->start_resync_timer); 4489 add_timer(&mdev->start_resync_timer);
4439 } 4490 }
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 4a0f314086e5..9c5c84946b05 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -37,6 +37,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req
37 const int rw = bio_data_dir(bio); 37 const int rw = bio_data_dir(bio);
38 int cpu; 38 int cpu;
39 cpu = part_stat_lock(); 39 cpu = part_stat_lock();
40 part_round_stats(cpu, &mdev->vdisk->part0);
40 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); 41 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
41 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); 42 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
42 part_inc_in_flight(&mdev->vdisk->part0, rw); 43 part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -214,8 +215,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
214{ 215{
215 const unsigned long s = req->rq_state; 216 const unsigned long s = req->rq_state;
216 struct drbd_conf *mdev = req->mdev; 217 struct drbd_conf *mdev = req->mdev;
217 /* only WRITES may end up here without a master bio (on barrier ack) */ 218 int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
218 int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
219 219
220 /* we must not complete the master bio, while it is 220 /* we must not complete the master bio, while it is
221 * still being processed by _drbd_send_zc_bio (drbd_send_dblock) 221 * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -230,7 +230,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
230 return; 230 return;
231 if (s & RQ_NET_PENDING) 231 if (s & RQ_NET_PENDING)
232 return; 232 return;
233 if (s & RQ_LOCAL_PENDING) 233 if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
234 return; 234 return;
235 235
236 if (req->master_bio) { 236 if (req->master_bio) {
@@ -277,6 +277,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
277 req->master_bio = NULL; 277 req->master_bio = NULL;
278 } 278 }
279 279
280 if (s & RQ_LOCAL_PENDING)
281 return;
282
280 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { 283 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
281 /* this is disconnected (local only) operation, 284 /* this is disconnected (local only) operation,
282 * or protocol C P_WRITE_ACK, 285 * or protocol C P_WRITE_ACK,
@@ -429,7 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
429 break; 432 break;
430 433
431 case completed_ok: 434 case completed_ok:
432 if (bio_data_dir(req->master_bio) == WRITE) 435 if (req->rq_state & RQ_WRITE)
433 mdev->writ_cnt += req->size>>9; 436 mdev->writ_cnt += req->size>>9;
434 else 437 else
435 mdev->read_cnt += req->size>>9; 438 mdev->read_cnt += req->size>>9;
@@ -438,7 +441,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
438 req->rq_state &= ~RQ_LOCAL_PENDING; 441 req->rq_state &= ~RQ_LOCAL_PENDING;
439 442
440 _req_may_be_done_not_susp(req, m); 443 _req_may_be_done_not_susp(req, m);
441 put_ldev(mdev); 444 break;
445
446 case abort_disk_io:
447 req->rq_state |= RQ_LOCAL_ABORTED;
448 if (req->rq_state & RQ_WRITE)
449 _req_may_be_done_not_susp(req, m);
450 else
451 goto goto_queue_for_net_read;
442 break; 452 break;
443 453
444 case write_completed_with_error: 454 case write_completed_with_error:
@@ -447,7 +457,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
447 457
448 __drbd_chk_io_error(mdev, false); 458 __drbd_chk_io_error(mdev, false);
449 _req_may_be_done_not_susp(req, m); 459 _req_may_be_done_not_susp(req, m);
450 put_ldev(mdev);
451 break; 460 break;
452 461
453 case read_ahead_completed_with_error: 462 case read_ahead_completed_with_error:
@@ -455,7 +464,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
455 req->rq_state |= RQ_LOCAL_COMPLETED; 464 req->rq_state |= RQ_LOCAL_COMPLETED;
456 req->rq_state &= ~RQ_LOCAL_PENDING; 465 req->rq_state &= ~RQ_LOCAL_PENDING;
457 _req_may_be_done_not_susp(req, m); 466 _req_may_be_done_not_susp(req, m);
458 put_ldev(mdev);
459 break; 467 break;
460 468
461 case read_completed_with_error: 469 case read_completed_with_error:
@@ -467,7 +475,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
467 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 475 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
468 476
469 __drbd_chk_io_error(mdev, false); 477 __drbd_chk_io_error(mdev, false);
470 put_ldev(mdev); 478
479 goto_queue_for_net_read:
471 480
472 /* no point in retrying if there is no good remote data, 481 /* no point in retrying if there is no good remote data,
473 * or we have no connection. */ 482 * or we have no connection. */
@@ -556,10 +565,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
556 drbd_queue_work(&mdev->data.work, &req->w); 565 drbd_queue_work(&mdev->data.work, &req->w);
557 break; 566 break;
558 567
559 case oos_handed_to_network: 568 case read_retry_remote_canceled:
560 /* actually the same */
561 case send_canceled: 569 case send_canceled:
562 /* treat it the same */
563 case send_failed: 570 case send_failed:
564 /* real cleanup will be done from tl_clear. just update flags 571 /* real cleanup will be done from tl_clear. just update flags
565 * so it is no longer marked as on the worker queue */ 572 * so it is no longer marked as on the worker queue */
@@ -589,17 +596,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
589 } 596 }
590 req->rq_state &= ~RQ_NET_QUEUED; 597 req->rq_state &= ~RQ_NET_QUEUED;
591 req->rq_state |= RQ_NET_SENT; 598 req->rq_state |= RQ_NET_SENT;
592 /* because _drbd_send_zc_bio could sleep, and may want to
593 * dereference the bio even after the "write_acked_by_peer" and
594 * "completed_ok" events came in, once we return from
595 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
596 * whether it is done already, and end it. */
597 _req_may_be_done_not_susp(req, m); 599 _req_may_be_done_not_susp(req, m);
598 break; 600 break;
599 601
600 case read_retry_remote_canceled: 602 case oos_handed_to_network:
603 /* Was not set PENDING, no longer QUEUED, so is now DONE
604 * as far as this connection is concerned. */
601 req->rq_state &= ~RQ_NET_QUEUED; 605 req->rq_state &= ~RQ_NET_QUEUED;
602 /* fall through, in case we raced with drbd_disconnect */ 606 req->rq_state |= RQ_NET_DONE;
607 _req_may_be_done_not_susp(req, m);
608 break;
609
603 case connection_lost_while_pending: 610 case connection_lost_while_pending:
604 /* transfer log cleanup after connection loss */ 611 /* transfer log cleanup after connection loss */
605 /* assert something? */ 612 /* assert something? */
@@ -616,8 +623,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
616 _req_may_be_done(req, m); /* Allowed while state.susp */ 623 _req_may_be_done(req, m); /* Allowed while state.susp */
617 break; 624 break;
618 625
619 case write_acked_by_peer_and_sis:
620 req->rq_state |= RQ_NET_SIS;
621 case conflict_discarded_by_peer: 626 case conflict_discarded_by_peer:
622 /* for discarded conflicting writes of multiple primaries, 627 /* for discarded conflicting writes of multiple primaries,
623 * there is no need to keep anything in the tl, potential 628 * there is no need to keep anything in the tl, potential
@@ -628,18 +633,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
628 (unsigned long long)req->sector, req->size); 633 (unsigned long long)req->sector, req->size);
629 req->rq_state |= RQ_NET_DONE; 634 req->rq_state |= RQ_NET_DONE;
630 /* fall through */ 635 /* fall through */
636 case write_acked_by_peer_and_sis:
631 case write_acked_by_peer: 637 case write_acked_by_peer:
638 if (what == write_acked_by_peer_and_sis)
639 req->rq_state |= RQ_NET_SIS;
632 /* protocol C; successfully written on peer. 640 /* protocol C; successfully written on peer.
633 * Nothing to do here. 641 * Nothing more to do here.
634 * We want to keep the tl in place for all protocols, to cater 642 * We want to keep the tl in place for all protocols, to cater
635 * for volatile write-back caches on lower level devices. 643 * for volatile write-back caches on lower level devices. */
636 *
637 * A barrier request is expected to have forced all prior
638 * requests onto stable storage, so completion of a barrier
639 * request could set NET_DONE right here, and not wait for the
640 * P_BARRIER_ACK, but that is an unnecessary optimization. */
641 644
642 /* this makes it effectively the same as for: */
643 case recv_acked_by_peer: 645 case recv_acked_by_peer:
644 /* protocol B; pretends to be successfully written on peer. 646 /* protocol B; pretends to be successfully written on peer.
645 * see also notes above in handed_over_to_network about 647 * see also notes above in handed_over_to_network about
@@ -773,6 +775,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
773 int local, remote, send_oos = 0; 775 int local, remote, send_oos = 0;
774 int err = -EIO; 776 int err = -EIO;
775 int ret = 0; 777 int ret = 0;
778 union drbd_state s;
776 779
777 /* allocate outside of all locks; */ 780 /* allocate outside of all locks; */
778 req = drbd_req_new(mdev, bio); 781 req = drbd_req_new(mdev, bio);
@@ -834,8 +837,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
834 drbd_al_begin_io(mdev, sector); 837 drbd_al_begin_io(mdev, sector);
835 } 838 }
836 839
837 remote = remote && drbd_should_do_remote(mdev->state); 840 s = mdev->state;
838 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); 841 remote = remote && drbd_should_do_remote(s);
842 send_oos = rw == WRITE && drbd_should_send_oos(s);
839 D_ASSERT(!(remote && send_oos)); 843 D_ASSERT(!(remote && send_oos));
840 844
841 if (!(local || remote) && !is_susp(mdev->state)) { 845 if (!(local || remote) && !is_susp(mdev->state)) {
@@ -867,7 +871,7 @@ allocate_barrier:
867 871
868 if (is_susp(mdev->state)) { 872 if (is_susp(mdev->state)) {
869 /* If we got suspended, use the retry mechanism of 873 /* If we got suspended, use the retry mechanism of
870 generic_make_request() to restart processing of this 874 drbd_make_request() to restart processing of this
871 bio. In the next call to drbd_make_request 875 bio. In the next call to drbd_make_request
872 we sleep in inc_ap_bio() */ 876 we sleep in inc_ap_bio() */
873 ret = 1; 877 ret = 1;
@@ -1091,7 +1095,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1091 */ 1095 */
1092 D_ASSERT(bio->bi_size > 0); 1096 D_ASSERT(bio->bi_size > 0);
1093 D_ASSERT((bio->bi_size & 0x1ff) == 0); 1097 D_ASSERT((bio->bi_size & 0x1ff) == 0);
1094 D_ASSERT(bio->bi_idx == 0);
1095 1098
1096 /* to make some things easier, force alignment of requests within the 1099 /* to make some things easier, force alignment of requests within the
1097 * granularity of our hash tables */ 1100 * granularity of our hash tables */
@@ -1099,8 +1102,9 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1099 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; 1102 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
1100 1103
1101 if (likely(s_enr == e_enr)) { 1104 if (likely(s_enr == e_enr)) {
1102 inc_ap_bio(mdev, 1); 1105 do {
1103 drbd_make_request_common(mdev, bio, start_time); 1106 inc_ap_bio(mdev, 1);
1107 } while (drbd_make_request_common(mdev, bio, start_time));
1104 return; 1108 return;
1105 } 1109 }
1106 1110
@@ -1196,36 +1200,66 @@ void request_timer_fn(unsigned long data)
1196 struct drbd_conf *mdev = (struct drbd_conf *) data; 1200 struct drbd_conf *mdev = (struct drbd_conf *) data;
1197 struct drbd_request *req; /* oldest request */ 1201 struct drbd_request *req; /* oldest request */
1198 struct list_head *le; 1202 struct list_head *le;
1199 unsigned long et = 0; /* effective timeout = ko_count * timeout */ 1203 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1204 unsigned long now;
1200 1205
1201 if (get_net_conf(mdev)) { 1206 if (get_net_conf(mdev)) {
1202 et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; 1207 if (mdev->state.conn >= C_WF_REPORT_PARAMS)
1208 ent = mdev->net_conf->timeout*HZ/10
1209 * mdev->net_conf->ko_count;
1203 put_net_conf(mdev); 1210 put_net_conf(mdev);
1204 } 1211 }
1205 if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) 1212 if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
1213 dt = mdev->ldev->dc.disk_timeout * HZ / 10;
1214 put_ldev(mdev);
1215 }
1216 et = min_not_zero(dt, ent);
1217
1218 if (!et)
1206 return; /* Recurring timer stopped */ 1219 return; /* Recurring timer stopped */
1207 1220
1221 now = jiffies;
1222
1208 spin_lock_irq(&mdev->req_lock); 1223 spin_lock_irq(&mdev->req_lock);
1209 le = &mdev->oldest_tle->requests; 1224 le = &mdev->oldest_tle->requests;
1210 if (list_empty(le)) { 1225 if (list_empty(le)) {
1211 spin_unlock_irq(&mdev->req_lock); 1226 spin_unlock_irq(&mdev->req_lock);
1212 mod_timer(&mdev->request_timer, jiffies + et); 1227 mod_timer(&mdev->request_timer, now + et);
1213 return; 1228 return;
1214 } 1229 }
1215 1230
1216 le = le->prev; 1231 le = le->prev;
1217 req = list_entry(le, struct drbd_request, tl_requests); 1232 req = list_entry(le, struct drbd_request, tl_requests);
1218 if (time_is_before_eq_jiffies(req->start_time + et)) {
1219 if (req->rq_state & RQ_NET_PENDING) {
1220 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1221 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1222 } else {
1223 dev_warn(DEV, "Local backing block device frozen?\n");
1224 mod_timer(&mdev->request_timer, jiffies + et);
1225 }
1226 } else {
1227 mod_timer(&mdev->request_timer, req->start_time + et);
1228 }
1229 1233
1234 /* The request is considered timed out, if
1235 * - we have some effective timeout from the configuration,
1236 * with above state restrictions applied,
1237 * - the oldest request is waiting for a response from the network
1238 * resp. the local disk,
1239 * - the oldest request is in fact older than the effective timeout,
1240 * - the connection was established (resp. disk was attached)
1241 * for longer than the timeout already.
1242 * Note that for 32bit jiffies and very stable connections/disks,
1243 * we may have a wrap around, which is catched by
1244 * !time_in_range(now, last_..._jif, last_..._jif + timeout).
1245 *
1246 * Side effect: once per 32bit wrap-around interval, which means every
1247 * ~198 days with 250 HZ, we have a window where the timeout would need
1248 * to expire twice (worst case) to become effective. Good enough.
1249 */
1250 if (ent && req->rq_state & RQ_NET_PENDING &&
1251 time_after(now, req->start_time + ent) &&
1252 !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
1253 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1254 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1255 }
1256 if (dt && req->rq_state & RQ_LOCAL_PENDING &&
1257 time_after(now, req->start_time + dt) &&
1258 !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
1259 dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1260 __drbd_chk_io_error(mdev, 1);
1261 }
1262 nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
1230 spin_unlock_irq(&mdev->req_lock); 1263 spin_unlock_irq(&mdev->req_lock);
1264 mod_timer(&mdev->request_timer, nt);
1231} 1265}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 68a234a5fdc5..3d2111919486 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -105,6 +105,7 @@ enum drbd_req_event {
105 read_completed_with_error, 105 read_completed_with_error,
106 read_ahead_completed_with_error, 106 read_ahead_completed_with_error,
107 write_completed_with_error, 107 write_completed_with_error,
108 abort_disk_io,
108 completed_ok, 109 completed_ok,
109 resend, 110 resend,
110 fail_frozen_disk_io, 111 fail_frozen_disk_io,
@@ -118,18 +119,21 @@ enum drbd_req_event {
118 * same time, so we should hold the request lock anyways. 119 * same time, so we should hold the request lock anyways.
119 */ 120 */
120enum drbd_req_state_bits { 121enum drbd_req_state_bits {
121 /* 210 122 /* 3210
122 * 000: no local possible 123 * 0000: no local possible
123 * 001: to be submitted 124 * 0001: to be submitted
124 * UNUSED, we could map: 011: submitted, completion still pending 125 * UNUSED, we could map: 011: submitted, completion still pending
125 * 110: completed ok 126 * 0110: completed ok
126 * 010: completed with error 127 * 0010: completed with error
128 * 1001: Aborted (before completion)
129 * 1x10: Aborted and completed -> free
127 */ 130 */
128 __RQ_LOCAL_PENDING, 131 __RQ_LOCAL_PENDING,
129 __RQ_LOCAL_COMPLETED, 132 __RQ_LOCAL_COMPLETED,
130 __RQ_LOCAL_OK, 133 __RQ_LOCAL_OK,
134 __RQ_LOCAL_ABORTED,
131 135
132 /* 76543 136 /* 87654
133 * 00000: no network possible 137 * 00000: no network possible
134 * 00001: to be send 138 * 00001: to be send
135 * 00011: to be send, on worker queue 139 * 00011: to be send, on worker queue
@@ -199,8 +203,9 @@ enum drbd_req_state_bits {
199#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) 203#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
200#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) 204#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
201#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) 205#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
206#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED)
202 207
203#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ 208#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1)
204 209
205#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) 210#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
206#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) 211#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 4d3e6f6213ba..620c70ff2231 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -70,11 +70,29 @@ rwlock_t global_state_lock;
70void drbd_md_io_complete(struct bio *bio, int error) 70void drbd_md_io_complete(struct bio *bio, int error)
71{ 71{
72 struct drbd_md_io *md_io; 72 struct drbd_md_io *md_io;
73 struct drbd_conf *mdev;
73 74
74 md_io = (struct drbd_md_io *)bio->bi_private; 75 md_io = (struct drbd_md_io *)bio->bi_private;
76 mdev = container_of(md_io, struct drbd_conf, md_io);
77
75 md_io->error = error; 78 md_io->error = error;
76 79
77 complete(&md_io->event); 80 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
81 * to timeout on the lower level device, and eventually detach from it.
82 * If this io completion runs after that timeout expired, this
83 * drbd_md_put_buffer() may allow us to finally try and re-attach.
84 * During normal operation, this only puts that extra reference
85 * down to 1 again.
86 * Make sure we first drop the reference, and only then signal
87 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
88 * next drbd_md_sync_page_io(), that we trigger the
89 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
90 */
91 drbd_md_put_buffer(mdev);
92 md_io->done = 1;
93 wake_up(&mdev->misc_wait);
94 bio_put(bio);
95 put_ldev(mdev);
78} 96}
79 97
80/* reads on behalf of the partner, 98/* reads on behalf of the partner,
@@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error)
226 spin_lock_irqsave(&mdev->req_lock, flags); 244 spin_lock_irqsave(&mdev->req_lock, flags);
227 __req_mod(req, what, &m); 245 __req_mod(req, what, &m);
228 spin_unlock_irqrestore(&mdev->req_lock, flags); 246 spin_unlock_irqrestore(&mdev->req_lock, flags);
247 put_ldev(mdev);
229 248
230 if (m.bio) 249 if (m.bio)
231 complete_master_bio(mdev, &m); 250 complete_master_bio(mdev, &m);
@@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
290 sg_init_table(&sg, 1); 309 sg_init_table(&sg, 1);
291 crypto_hash_init(&desc); 310 crypto_hash_init(&desc);
292 311
293 __bio_for_each_segment(bvec, bio, i, 0) { 312 bio_for_each_segment(bvec, bio, i) {
294 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); 313 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
295 crypto_hash_update(&desc, &sg, sg.length); 314 crypto_hash_update(&desc, &sg, sg.length);
296 } 315 }
@@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
728 } 747 }
729 748
730 drbd_start_resync(mdev, C_SYNC_SOURCE); 749 drbd_start_resync(mdev, C_SYNC_SOURCE);
731 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); 750 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
732 return 1; 751 return 1;
733} 752}
734 753
@@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1519 } 1538 }
1520 1539
1521 drbd_state_lock(mdev); 1540 drbd_state_lock(mdev);
1522 1541 write_lock_irq(&global_state_lock);
1523 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { 1542 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
1543 write_unlock_irq(&global_state_lock);
1524 drbd_state_unlock(mdev); 1544 drbd_state_unlock(mdev);
1525 return; 1545 return;
1526 } 1546 }
1527 1547
1528 write_lock_irq(&global_state_lock); 1548 ns.i = mdev->state.i;
1529 ns = mdev->state;
1530 1549
1531 ns.aftr_isp = !_drbd_may_sync_now(mdev); 1550 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1532 1551
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 744f078f4dd8..cce7df367b79 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -202,7 +202,6 @@ static int slow_floppy;
202 202
203#include <asm/dma.h> 203#include <asm/dma.h>
204#include <asm/irq.h> 204#include <asm/irq.h>
205#include <asm/system.h>
206 205
207static int FLOPPY_IRQ = 6; 206static int FLOPPY_IRQ = 6;
208static int FLOPPY_DMA = 2; 207static int FLOPPY_DMA = 2;
@@ -552,7 +551,7 @@ static void floppy_ready(void);
552static void floppy_start(void); 551static void floppy_start(void);
553static void process_fd_request(void); 552static void process_fd_request(void);
554static void recalibrate_floppy(void); 553static void recalibrate_floppy(void);
555static void floppy_shutdown(unsigned long); 554static void floppy_shutdown(struct work_struct *);
556 555
557static int floppy_request_regions(int); 556static int floppy_request_regions(int);
558static void floppy_release_regions(int); 557static void floppy_release_regions(int);
@@ -589,6 +588,8 @@ static int buffer_max = -1;
589static struct floppy_fdc_state fdc_state[N_FDC]; 588static struct floppy_fdc_state fdc_state[N_FDC];
590static int fdc; /* current fdc */ 589static int fdc; /* current fdc */
591 590
591static struct workqueue_struct *floppy_wq;
592
592static struct floppy_struct *_floppy = floppy_type; 593static struct floppy_struct *_floppy = floppy_type;
593static unsigned char current_drive; 594static unsigned char current_drive;
594static long current_count_sectors; 595static long current_count_sectors;
@@ -630,16 +631,15 @@ static inline void set_debugt(void) { }
630static inline void debugt(const char *func, const char *msg) { } 631static inline void debugt(const char *func, const char *msg) { }
631#endif /* DEBUGT */ 632#endif /* DEBUGT */
632 633
633typedef void (*timeout_fn)(unsigned long);
634static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0);
635 634
635static DECLARE_DELAYED_WORK(fd_timeout, floppy_shutdown);
636static const char *timeout_message; 636static const char *timeout_message;
637 637
638static void is_alive(const char *func, const char *message) 638static void is_alive(const char *func, const char *message)
639{ 639{
640 /* this routine checks whether the floppy driver is "alive" */ 640 /* this routine checks whether the floppy driver is "alive" */
641 if (test_bit(0, &fdc_busy) && command_status < 2 && 641 if (test_bit(0, &fdc_busy) && command_status < 2 &&
642 !timer_pending(&fd_timeout)) { 642 !delayed_work_pending(&fd_timeout)) {
643 DPRINT("%s: timeout handler died. %s\n", func, message); 643 DPRINT("%s: timeout handler died. %s\n", func, message);
644 } 644 }
645} 645}
@@ -667,15 +667,18 @@ static int output_log_pos;
667 667
668static void __reschedule_timeout(int drive, const char *message) 668static void __reschedule_timeout(int drive, const char *message)
669{ 669{
670 unsigned long delay;
671
670 if (drive == current_reqD) 672 if (drive == current_reqD)
671 drive = current_drive; 673 drive = current_drive;
672 del_timer(&fd_timeout); 674
673 if (drive < 0 || drive >= N_DRIVE) { 675 if (drive < 0 || drive >= N_DRIVE) {
674 fd_timeout.expires = jiffies + 20UL * HZ; 676 delay = 20UL * HZ;
675 drive = 0; 677 drive = 0;
676 } else 678 } else
677 fd_timeout.expires = jiffies + UDP->timeout; 679 delay = UDP->timeout;
678 add_timer(&fd_timeout); 680
681 queue_delayed_work(floppy_wq, &fd_timeout, delay);
679 if (UDP->flags & FD_DEBUG) 682 if (UDP->flags & FD_DEBUG)
680 DPRINT("reschedule timeout %s\n", message); 683 DPRINT("reschedule timeout %s\n", message);
681 timeout_message = message; 684 timeout_message = message;
@@ -873,7 +876,7 @@ static int lock_fdc(int drive, bool interruptible)
873 876
874 command_status = FD_COMMAND_NONE; 877 command_status = FD_COMMAND_NONE;
875 878
876 __reschedule_timeout(drive, "lock fdc"); 879 reschedule_timeout(drive, "lock fdc");
877 set_fdc(drive); 880 set_fdc(drive);
878 return 0; 881 return 0;
879} 882}
@@ -881,23 +884,15 @@ static int lock_fdc(int drive, bool interruptible)
881/* unlocks the driver */ 884/* unlocks the driver */
882static void unlock_fdc(void) 885static void unlock_fdc(void)
883{ 886{
884 unsigned long flags;
885
886 raw_cmd = NULL;
887 if (!test_bit(0, &fdc_busy)) 887 if (!test_bit(0, &fdc_busy))
888 DPRINT("FDC access conflict!\n"); 888 DPRINT("FDC access conflict!\n");
889 889
890 if (do_floppy) 890 raw_cmd = NULL;
891 DPRINT("device interrupt still active at FDC release: %pf!\n",
892 do_floppy);
893 command_status = FD_COMMAND_NONE; 891 command_status = FD_COMMAND_NONE;
894 spin_lock_irqsave(&floppy_lock, flags); 892 __cancel_delayed_work(&fd_timeout);
895 del_timer(&fd_timeout); 893 do_floppy = NULL;
896 cont = NULL; 894 cont = NULL;
897 clear_bit(0, &fdc_busy); 895 clear_bit(0, &fdc_busy);
898 if (current_req || set_next_request())
899 do_fd_request(current_req->q);
900 spin_unlock_irqrestore(&floppy_lock, flags);
901 wake_up(&fdc_wait); 896 wake_up(&fdc_wait);
902} 897}
903 898
@@ -969,26 +964,24 @@ static DECLARE_WORK(floppy_work, NULL);
969 964
970static void schedule_bh(void (*handler)(void)) 965static void schedule_bh(void (*handler)(void))
971{ 966{
967 WARN_ON(work_pending(&floppy_work));
968
972 PREPARE_WORK(&floppy_work, (work_func_t)handler); 969 PREPARE_WORK(&floppy_work, (work_func_t)handler);
973 schedule_work(&floppy_work); 970 queue_work(floppy_wq, &floppy_work);
974} 971}
975 972
976static DEFINE_TIMER(fd_timer, NULL, 0, 0); 973static DECLARE_DELAYED_WORK(fd_timer, NULL);
977 974
978static void cancel_activity(void) 975static void cancel_activity(void)
979{ 976{
980 unsigned long flags;
981
982 spin_lock_irqsave(&floppy_lock, flags);
983 do_floppy = NULL; 977 do_floppy = NULL;
984 PREPARE_WORK(&floppy_work, (work_func_t)empty); 978 cancel_delayed_work_sync(&fd_timer);
985 del_timer(&fd_timer); 979 cancel_work_sync(&floppy_work);
986 spin_unlock_irqrestore(&floppy_lock, flags);
987} 980}
988 981
989/* this function makes sure that the disk stays in the drive during the 982/* this function makes sure that the disk stays in the drive during the
990 * transfer */ 983 * transfer */
991static void fd_watchdog(void) 984static void fd_watchdog(struct work_struct *arg)
992{ 985{
993 debug_dcl(DP->flags, "calling disk change from watchdog\n"); 986 debug_dcl(DP->flags, "calling disk change from watchdog\n");
994 987
@@ -998,21 +991,20 @@ static void fd_watchdog(void)
998 cont->done(0); 991 cont->done(0);
999 reset_fdc(); 992 reset_fdc();
1000 } else { 993 } else {
1001 del_timer(&fd_timer); 994 cancel_delayed_work(&fd_timer);
1002 fd_timer.function = (timeout_fn)fd_watchdog; 995 PREPARE_DELAYED_WORK(&fd_timer, fd_watchdog);
1003 fd_timer.expires = jiffies + HZ / 10; 996 queue_delayed_work(floppy_wq, &fd_timer, HZ / 10);
1004 add_timer(&fd_timer);
1005 } 997 }
1006} 998}
1007 999
1008static void main_command_interrupt(void) 1000static void main_command_interrupt(void)
1009{ 1001{
1010 del_timer(&fd_timer); 1002 cancel_delayed_work(&fd_timer);
1011 cont->interrupt(); 1003 cont->interrupt();
1012} 1004}
1013 1005
1014/* waits for a delay (spinup or select) to pass */ 1006/* waits for a delay (spinup or select) to pass */
1015static int fd_wait_for_completion(unsigned long delay, timeout_fn function) 1007static int fd_wait_for_completion(unsigned long expires, work_func_t function)
1016{ 1008{
1017 if (FDCS->reset) { 1009 if (FDCS->reset) {
1018 reset_fdc(); /* do the reset during sleep to win time 1010 reset_fdc(); /* do the reset during sleep to win time
@@ -1021,47 +1013,15 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
1021 return 1; 1013 return 1;
1022 } 1014 }
1023 1015
1024 if (time_before(jiffies, delay)) { 1016 if (time_before(jiffies, expires)) {
1025 del_timer(&fd_timer); 1017 cancel_delayed_work(&fd_timer);
1026 fd_timer.function = function; 1018 PREPARE_DELAYED_WORK(&fd_timer, function);
1027 fd_timer.expires = delay; 1019 queue_delayed_work(floppy_wq, &fd_timer, expires - jiffies);
1028 add_timer(&fd_timer);
1029 return 1; 1020 return 1;
1030 } 1021 }
1031 return 0; 1022 return 0;
1032} 1023}
1033 1024
1034static DEFINE_SPINLOCK(floppy_hlt_lock);
1035static int hlt_disabled;
1036static void floppy_disable_hlt(void)
1037{
1038 unsigned long flags;
1039
1040 WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
1041 spin_lock_irqsave(&floppy_hlt_lock, flags);
1042 if (!hlt_disabled) {
1043 hlt_disabled = 1;
1044#ifdef HAVE_DISABLE_HLT
1045 disable_hlt();
1046#endif
1047 }
1048 spin_unlock_irqrestore(&floppy_hlt_lock, flags);
1049}
1050
1051static void floppy_enable_hlt(void)
1052{
1053 unsigned long flags;
1054
1055 spin_lock_irqsave(&floppy_hlt_lock, flags);
1056 if (hlt_disabled) {
1057 hlt_disabled = 0;
1058#ifdef HAVE_DISABLE_HLT
1059 enable_hlt();
1060#endif
1061 }
1062 spin_unlock_irqrestore(&floppy_hlt_lock, flags);
1063}
1064
1065static void setup_DMA(void) 1025static void setup_DMA(void)
1066{ 1026{
1067 unsigned long f; 1027 unsigned long f;
@@ -1106,7 +1066,6 @@ static void setup_DMA(void)
1106 fd_enable_dma(); 1066 fd_enable_dma();
1107 release_dma_lock(f); 1067 release_dma_lock(f);
1108#endif 1068#endif
1109 floppy_disable_hlt();
1110} 1069}
1111 1070
1112static void show_floppy(void); 1071static void show_floppy(void);
@@ -1375,7 +1334,7 @@ static int fdc_dtr(void)
1375 */ 1334 */
1376 FDCS->dtr = raw_cmd->rate & 3; 1335 FDCS->dtr = raw_cmd->rate & 3;
1377 return fd_wait_for_completion(jiffies + 2UL * HZ / 100, 1336 return fd_wait_for_completion(jiffies + 2UL * HZ / 100,
1378 (timeout_fn)floppy_ready); 1337 (work_func_t)floppy_ready);
1379} /* fdc_dtr */ 1338} /* fdc_dtr */
1380 1339
1381static void tell_sector(void) 1340static void tell_sector(void)
@@ -1480,7 +1439,7 @@ static void setup_rw_floppy(void)
1480 int flags; 1439 int flags;
1481 int dflags; 1440 int dflags;
1482 unsigned long ready_date; 1441 unsigned long ready_date;
1483 timeout_fn function; 1442 work_func_t function;
1484 1443
1485 flags = raw_cmd->flags; 1444 flags = raw_cmd->flags;
1486 if (flags & (FD_RAW_READ | FD_RAW_WRITE)) 1445 if (flags & (FD_RAW_READ | FD_RAW_WRITE))
@@ -1494,9 +1453,9 @@ static void setup_rw_floppy(void)
1494 */ 1453 */
1495 if (time_after(ready_date, jiffies + DP->select_delay)) { 1454 if (time_after(ready_date, jiffies + DP->select_delay)) {
1496 ready_date -= DP->select_delay; 1455 ready_date -= DP->select_delay;
1497 function = (timeout_fn)floppy_start; 1456 function = (work_func_t)floppy_start;
1498 } else 1457 } else
1499 function = (timeout_fn)setup_rw_floppy; 1458 function = (work_func_t)setup_rw_floppy;
1500 1459
1501 /* wait until the floppy is spinning fast enough */ 1460 /* wait until the floppy is spinning fast enough */
1502 if (fd_wait_for_completion(ready_date, function)) 1461 if (fd_wait_for_completion(ready_date, function))
@@ -1526,7 +1485,7 @@ static void setup_rw_floppy(void)
1526 inr = result(); 1485 inr = result();
1527 cont->interrupt(); 1486 cont->interrupt();
1528 } else if (flags & FD_RAW_NEED_DISK) 1487 } else if (flags & FD_RAW_NEED_DISK)
1529 fd_watchdog(); 1488 fd_watchdog(NULL);
1530} 1489}
1531 1490
1532static int blind_seek; 1491static int blind_seek;
@@ -1708,7 +1667,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
1708 fd_disable_dma(); 1667 fd_disable_dma();
1709 release_dma_lock(f); 1668 release_dma_lock(f);
1710 1669
1711 floppy_enable_hlt();
1712 do_floppy = NULL; 1670 do_floppy = NULL;
1713 if (fdc >= N_FDC || FDCS->address == -1) { 1671 if (fdc >= N_FDC || FDCS->address == -1) {
1714 /* we don't even know which FDC is the culprit */ 1672 /* we don't even know which FDC is the culprit */
@@ -1836,20 +1794,22 @@ static void show_floppy(void)
1836 pr_info("do_floppy=%pf\n", do_floppy); 1794 pr_info("do_floppy=%pf\n", do_floppy);
1837 if (work_pending(&floppy_work)) 1795 if (work_pending(&floppy_work))
1838 pr_info("floppy_work.func=%pf\n", floppy_work.func); 1796 pr_info("floppy_work.func=%pf\n", floppy_work.func);
1839 if (timer_pending(&fd_timer)) 1797 if (delayed_work_pending(&fd_timer))
1840 pr_info("fd_timer.function=%pf\n", fd_timer.function); 1798 pr_info("delayed work.function=%p expires=%ld\n",
1841 if (timer_pending(&fd_timeout)) { 1799 fd_timer.work.func,
1842 pr_info("timer_function=%pf\n", fd_timeout.function); 1800 fd_timer.timer.expires - jiffies);
1843 pr_info("expires=%lu\n", fd_timeout.expires - jiffies); 1801 if (delayed_work_pending(&fd_timeout))
1844 pr_info("now=%lu\n", jiffies); 1802 pr_info("timer_function=%p expires=%ld\n",
1845 } 1803 fd_timeout.work.func,
1804 fd_timeout.timer.expires - jiffies);
1805
1846 pr_info("cont=%p\n", cont); 1806 pr_info("cont=%p\n", cont);
1847 pr_info("current_req=%p\n", current_req); 1807 pr_info("current_req=%p\n", current_req);
1848 pr_info("command_status=%d\n", command_status); 1808 pr_info("command_status=%d\n", command_status);
1849 pr_info("\n"); 1809 pr_info("\n");
1850} 1810}
1851 1811
1852static void floppy_shutdown(unsigned long data) 1812static void floppy_shutdown(struct work_struct *arg)
1853{ 1813{
1854 unsigned long flags; 1814 unsigned long flags;
1855 1815
@@ -1857,8 +1817,6 @@ static void floppy_shutdown(unsigned long data)
1857 show_floppy(); 1817 show_floppy();
1858 cancel_activity(); 1818 cancel_activity();
1859 1819
1860 floppy_enable_hlt();
1861
1862 flags = claim_dma_lock(); 1820 flags = claim_dma_lock();
1863 fd_disable_dma(); 1821 fd_disable_dma();
1864 release_dma_lock(flags); 1822 release_dma_lock(flags);
@@ -1904,7 +1862,7 @@ static int start_motor(void (*function)(void))
1904 1862
1905 /* wait_for_completion also schedules reset if needed. */ 1863 /* wait_for_completion also schedules reset if needed. */
1906 return fd_wait_for_completion(DRS->select_date + DP->select_delay, 1864 return fd_wait_for_completion(DRS->select_date + DP->select_delay,
1907 (timeout_fn)function); 1865 (work_func_t)function);
1908} 1866}
1909 1867
1910static void floppy_ready(void) 1868static void floppy_ready(void)
@@ -2857,7 +2815,6 @@ do_request:
2857 spin_lock_irq(&floppy_lock); 2815 spin_lock_irq(&floppy_lock);
2858 pending = set_next_request(); 2816 pending = set_next_request();
2859 spin_unlock_irq(&floppy_lock); 2817 spin_unlock_irq(&floppy_lock);
2860
2861 if (!pending) { 2818 if (!pending) {
2862 do_floppy = NULL; 2819 do_floppy = NULL;
2863 unlock_fdc(); 2820 unlock_fdc();
@@ -2934,13 +2891,15 @@ static void do_fd_request(struct request_queue *q)
2934 current_req->cmd_flags)) 2891 current_req->cmd_flags))
2935 return; 2892 return;
2936 2893
2937 if (test_bit(0, &fdc_busy)) { 2894 if (test_and_set_bit(0, &fdc_busy)) {
2938 /* fdc busy, this new request will be treated when the 2895 /* fdc busy, this new request will be treated when the
2939 current one is done */ 2896 current one is done */
2940 is_alive(__func__, "old request running"); 2897 is_alive(__func__, "old request running");
2941 return; 2898 return;
2942 } 2899 }
2943 lock_fdc(MAXTIMEOUT, false); 2900 command_status = FD_COMMAND_NONE;
2901 __reschedule_timeout(MAXTIMEOUT, "fd_request");
2902 set_fdc(0);
2944 process_fd_request(); 2903 process_fd_request();
2945 is_alive(__func__, ""); 2904 is_alive(__func__, "");
2946} 2905}
@@ -3648,9 +3607,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
3648 3607
3649 mutex_lock(&floppy_mutex); 3608 mutex_lock(&floppy_mutex);
3650 mutex_lock(&open_lock); 3609 mutex_lock(&open_lock);
3651 if (UDRS->fd_ref < 0) 3610 if (!UDRS->fd_ref--) {
3652 UDRS->fd_ref = 0;
3653 else if (!UDRS->fd_ref--) {
3654 DPRINT("floppy_release with fd_ref == 0"); 3611 DPRINT("floppy_release with fd_ref == 0");
3655 UDRS->fd_ref = 0; 3612 UDRS->fd_ref = 0;
3656 } 3613 }
@@ -3686,13 +3643,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
3686 set_bit(FD_VERIFY_BIT, &UDRS->flags); 3643 set_bit(FD_VERIFY_BIT, &UDRS->flags);
3687 } 3644 }
3688 3645
3689 if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (mode & FMODE_EXCL))) 3646 UDRS->fd_ref++;
3690 goto out2;
3691
3692 if (mode & FMODE_EXCL)
3693 UDRS->fd_ref = -1;
3694 else
3695 UDRS->fd_ref++;
3696 3647
3697 opened_bdev[drive] = bdev; 3648 opened_bdev[drive] = bdev;
3698 3649
@@ -3755,10 +3706,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
3755 mutex_unlock(&floppy_mutex); 3706 mutex_unlock(&floppy_mutex);
3756 return 0; 3707 return 0;
3757out: 3708out:
3758 if (UDRS->fd_ref < 0) 3709 UDRS->fd_ref--;
3759 UDRS->fd_ref = 0; 3710
3760 else
3761 UDRS->fd_ref--;
3762 if (!UDRS->fd_ref) 3711 if (!UDRS->fd_ref)
3763 opened_bdev[drive] = NULL; 3712 opened_bdev[drive] = NULL;
3764out2: 3713out2:
@@ -4195,10 +4144,16 @@ static int __init floppy_init(void)
4195 goto out_put_disk; 4144 goto out_put_disk;
4196 } 4145 }
4197 4146
4147 floppy_wq = alloc_ordered_workqueue("floppy", 0);
4148 if (!floppy_wq) {
4149 err = -ENOMEM;
4150 goto out_put_disk;
4151 }
4152
4198 disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock); 4153 disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
4199 if (!disks[dr]->queue) { 4154 if (!disks[dr]->queue) {
4200 err = -ENOMEM; 4155 err = -ENOMEM;
4201 goto out_put_disk; 4156 goto out_destroy_workq;
4202 } 4157 }
4203 4158
4204 blk_queue_max_hw_sectors(disks[dr]->queue, 64); 4159 blk_queue_max_hw_sectors(disks[dr]->queue, 64);
@@ -4249,7 +4204,7 @@ static int __init floppy_init(void)
4249 use_virtual_dma = can_use_virtual_dma & 1; 4204 use_virtual_dma = can_use_virtual_dma & 1;
4250 fdc_state[0].address = FDC1; 4205 fdc_state[0].address = FDC1;
4251 if (fdc_state[0].address == -1) { 4206 if (fdc_state[0].address == -1) {
4252 del_timer_sync(&fd_timeout); 4207 cancel_delayed_work(&fd_timeout);
4253 err = -ENODEV; 4208 err = -ENODEV;
4254 goto out_unreg_region; 4209 goto out_unreg_region;
4255 } 4210 }
@@ -4260,7 +4215,7 @@ static int __init floppy_init(void)
4260 fdc = 0; /* reset fdc in case of unexpected interrupt */ 4215 fdc = 0; /* reset fdc in case of unexpected interrupt */
4261 err = floppy_grab_irq_and_dma(); 4216 err = floppy_grab_irq_and_dma();
4262 if (err) { 4217 if (err) {
4263 del_timer_sync(&fd_timeout); 4218 cancel_delayed_work(&fd_timeout);
4264 err = -EBUSY; 4219 err = -EBUSY;
4265 goto out_unreg_region; 4220 goto out_unreg_region;
4266 } 4221 }
@@ -4317,13 +4272,13 @@ static int __init floppy_init(void)
4317 user_reset_fdc(-1, FD_RESET_ALWAYS, false); 4272 user_reset_fdc(-1, FD_RESET_ALWAYS, false);
4318 } 4273 }
4319 fdc = 0; 4274 fdc = 0;
4320 del_timer_sync(&fd_timeout); 4275 cancel_delayed_work(&fd_timeout);
4321 current_drive = 0; 4276 current_drive = 0;
4322 initialized = true; 4277 initialized = true;
4323 if (have_no_fdc) { 4278 if (have_no_fdc) {
4324 DPRINT("no floppy controllers found\n"); 4279 DPRINT("no floppy controllers found\n");
4325 err = have_no_fdc; 4280 err = have_no_fdc;
4326 goto out_flush_work; 4281 goto out_release_dma;
4327 } 4282 }
4328 4283
4329 for (drive = 0; drive < N_DRIVE; drive++) { 4284 for (drive = 0; drive < N_DRIVE; drive++) {
@@ -4338,7 +4293,7 @@ static int __init floppy_init(void)
4338 4293
4339 err = platform_device_register(&floppy_device[drive]); 4294 err = platform_device_register(&floppy_device[drive]);
4340 if (err) 4295 if (err)
4341 goto out_flush_work; 4296 goto out_release_dma;
4342 4297
4343 err = device_create_file(&floppy_device[drive].dev, 4298 err = device_create_file(&floppy_device[drive].dev,
4344 &dev_attr_cmos); 4299 &dev_attr_cmos);
@@ -4356,13 +4311,14 @@ static int __init floppy_init(void)
4356 4311
4357out_unreg_platform_dev: 4312out_unreg_platform_dev:
4358 platform_device_unregister(&floppy_device[drive]); 4313 platform_device_unregister(&floppy_device[drive]);
4359out_flush_work: 4314out_release_dma:
4360 flush_work_sync(&floppy_work);
4361 if (atomic_read(&usage_count)) 4315 if (atomic_read(&usage_count))
4362 floppy_release_irq_and_dma(); 4316 floppy_release_irq_and_dma();
4363out_unreg_region: 4317out_unreg_region:
4364 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 4318 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
4365 platform_driver_unregister(&floppy_driver); 4319 platform_driver_unregister(&floppy_driver);
4320out_destroy_workq:
4321 destroy_workqueue(floppy_wq);
4366out_unreg_blkdev: 4322out_unreg_blkdev:
4367 unregister_blkdev(FLOPPY_MAJOR, "fd"); 4323 unregister_blkdev(FLOPPY_MAJOR, "fd");
4368out_put_disk: 4324out_put_disk:
@@ -4433,7 +4389,7 @@ static int floppy_grab_irq_and_dma(void)
4433 * We might have scheduled a free_irq(), wait it to 4389 * We might have scheduled a free_irq(), wait it to
4434 * drain first: 4390 * drain first:
4435 */ 4391 */
4436 flush_work_sync(&floppy_work); 4392 flush_workqueue(floppy_wq);
4437 4393
4438 if (fd_request_irq()) { 4394 if (fd_request_irq()) {
4439 DPRINT("Unable to grab IRQ%d for the floppy driver\n", 4395 DPRINT("Unable to grab IRQ%d for the floppy driver\n",
@@ -4509,7 +4465,6 @@ static void floppy_release_irq_and_dma(void)
4509#if N_FDC > 1 4465#if N_FDC > 1
4510 set_dor(1, ~8, 0); 4466 set_dor(1, ~8, 0);
4511#endif 4467#endif
4512 floppy_enable_hlt();
4513 4468
4514 if (floppy_track_buffer && max_buffer_sectors) { 4469 if (floppy_track_buffer && max_buffer_sectors) {
4515 tmpsize = max_buffer_sectors * 1024; 4470 tmpsize = max_buffer_sectors * 1024;
@@ -4525,9 +4480,9 @@ static void floppy_release_irq_and_dma(void)
4525 pr_info("motor off timer %d still active\n", drive); 4480 pr_info("motor off timer %d still active\n", drive);
4526#endif 4481#endif
4527 4482
4528 if (timer_pending(&fd_timeout)) 4483 if (delayed_work_pending(&fd_timeout))
4529 pr_info("floppy timer still active:%s\n", timeout_message); 4484 pr_info("floppy timer still active:%s\n", timeout_message);
4530 if (timer_pending(&fd_timer)) 4485 if (delayed_work_pending(&fd_timer))
4531 pr_info("auxiliary floppy timer still active\n"); 4486 pr_info("auxiliary floppy timer still active\n");
4532 if (work_pending(&floppy_work)) 4487 if (work_pending(&floppy_work))
4533 pr_info("work still pending\n"); 4488 pr_info("work still pending\n");
@@ -4597,8 +4552,9 @@ static void __exit floppy_module_exit(void)
4597 put_disk(disks[drive]); 4552 put_disk(disks[drive]);
4598 } 4553 }
4599 4554
4600 del_timer_sync(&fd_timeout); 4555 cancel_delayed_work_sync(&fd_timeout);
4601 del_timer_sync(&fd_timer); 4556 cancel_delayed_work_sync(&fd_timer);
4557 destroy_workqueue(floppy_wq);
4602 4558
4603 if (atomic_read(&usage_count)) 4559 if (atomic_read(&usage_count))
4604 floppy_release_irq_and_dma(); 4560 floppy_release_irq_and_dma();
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index b52c9ca146fc..bf397bf108b7 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -44,7 +44,6 @@
44#define HD_IRQ 14 44#define HD_IRQ 14
45 45
46#define REALLY_SLOW_IO 46#define REALLY_SLOW_IO
47#include <asm/system.h>
48#include <asm/io.h> 47#include <asm/io.h>
49#include <asm/uaccess.h> 48#include <asm/uaccess.h>
50 49
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cd504353b278..bbca966f8f66 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd,
93 struct page *loop_page, unsigned loop_off, 93 struct page *loop_page, unsigned loop_off,
94 int size, sector_t real_block) 94 int size, sector_t real_block)
95{ 95{
96 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 96 char *raw_buf = kmap_atomic(raw_page) + raw_off;
97 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 97 char *loop_buf = kmap_atomic(loop_page) + loop_off;
98 98
99 if (cmd == READ) 99 if (cmd == READ)
100 memcpy(loop_buf, raw_buf, size); 100 memcpy(loop_buf, raw_buf, size);
101 else 101 else
102 memcpy(raw_buf, loop_buf, size); 102 memcpy(raw_buf, loop_buf, size);
103 103
104 kunmap_atomic(loop_buf, KM_USER1); 104 kunmap_atomic(loop_buf);
105 kunmap_atomic(raw_buf, KM_USER0); 105 kunmap_atomic(raw_buf);
106 cond_resched(); 106 cond_resched();
107 return 0; 107 return 0;
108} 108}
@@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
112 struct page *loop_page, unsigned loop_off, 112 struct page *loop_page, unsigned loop_off,
113 int size, sector_t real_block) 113 int size, sector_t real_block)
114{ 114{
115 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 115 char *raw_buf = kmap_atomic(raw_page) + raw_off;
116 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 116 char *loop_buf = kmap_atomic(loop_page) + loop_off;
117 char *in, *out, *key; 117 char *in, *out, *key;
118 int i, keysize; 118 int i, keysize;
119 119
@@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
130 for (i = 0; i < size; i++) 130 for (i = 0; i < size; i++)
131 *out++ = *in++ ^ key[(i & 511) % keysize]; 131 *out++ = *in++ ^ key[(i & 511) % keysize];
132 132
133 kunmap_atomic(loop_buf, KM_USER1); 133 kunmap_atomic(loop_buf);
134 kunmap_atomic(raw_buf, KM_USER0); 134 kunmap_atomic(raw_buf);
135 cond_resched(); 135 cond_resched();
136 return 0; 136 return 0;
137} 137}
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig
index b5dd14e072f2..0ba837fc62a8 100644
--- a/drivers/block/mtip32xx/Kconfig
+++ b/drivers/block/mtip32xx/Kconfig
@@ -4,6 +4,6 @@
4 4
5config BLK_DEV_PCIESSD_MTIP32XX 5config BLK_DEV_PCIESSD_MTIP32XX
6 tristate "Block Device Driver for Micron PCIe SSDs" 6 tristate "Block Device Driver for Micron PCIe SSDs"
7 depends on HOTPLUG_PCI_PCIE 7 depends on PCI
8 help 8 help
9 This enables the block driver for Micron PCIe SSDs. 9 This enables the block driver for Micron PCIe SSDs.
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 8eb81c96608f..264bc77dcb91 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -36,6 +36,7 @@
36#include <linux/idr.h> 36#include <linux/idr.h>
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38#include <../drivers/ata/ahci.h> 38#include <../drivers/ata/ahci.h>
39#include <linux/export.h>
39#include "mtip32xx.h" 40#include "mtip32xx.h"
40 41
41#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) 42#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
@@ -44,6 +45,7 @@
44#define HW_PORT_PRIV_DMA_SZ \ 45#define HW_PORT_PRIV_DMA_SZ \
45 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) 46 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
46 47
48#define HOST_CAP_NZDMA (1 << 19)
47#define HOST_HSORG 0xFC 49#define HOST_HSORG 0xFC
48#define HSORG_DISABLE_SLOTGRP_INTR (1<<24) 50#define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
49#define HSORG_DISABLE_SLOTGRP_PXIS (1<<16) 51#define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
@@ -139,6 +141,12 @@ static void mtip_command_cleanup(struct driver_data *dd)
139 int group = 0, commandslot = 0, commandindex = 0; 141 int group = 0, commandslot = 0, commandindex = 0;
140 struct mtip_cmd *command; 142 struct mtip_cmd *command;
141 struct mtip_port *port = dd->port; 143 struct mtip_port *port = dd->port;
144 static int in_progress;
145
146 if (in_progress)
147 return;
148
149 in_progress = 1;
142 150
143 for (group = 0; group < 4; group++) { 151 for (group = 0; group < 4; group++) {
144 for (commandslot = 0; commandslot < 32; commandslot++) { 152 for (commandslot = 0; commandslot < 32; commandslot++) {
@@ -165,7 +173,8 @@ static void mtip_command_cleanup(struct driver_data *dd)
165 173
166 up(&port->cmd_slot); 174 up(&port->cmd_slot);
167 175
168 atomic_set(&dd->drv_cleanup_done, true); 176 set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
177 in_progress = 0;
169} 178}
170 179
171/* 180/*
@@ -262,6 +271,9 @@ static int hba_reset_nosleep(struct driver_data *dd)
262 && time_before(jiffies, timeout)) 271 && time_before(jiffies, timeout))
263 mdelay(1); 272 mdelay(1);
264 273
274 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
275 return -1;
276
265 if (readl(dd->mmio + HOST_CTL) & HOST_RESET) 277 if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
266 return -1; 278 return -1;
267 279
@@ -282,18 +294,20 @@ static int hba_reset_nosleep(struct driver_data *dd)
282 */ 294 */
283static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) 295static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
284{ 296{
285 unsigned long flags = 0;
286
287 atomic_set(&port->commands[tag].active, 1); 297 atomic_set(&port->commands[tag].active, 1);
288 298
289 spin_lock_irqsave(&port->cmd_issue_lock, flags); 299 spin_lock(&port->cmd_issue_lock);
290 300
291 writel((1 << MTIP_TAG_BIT(tag)), 301 writel((1 << MTIP_TAG_BIT(tag)),
292 port->s_active[MTIP_TAG_INDEX(tag)]); 302 port->s_active[MTIP_TAG_INDEX(tag)]);
293 writel((1 << MTIP_TAG_BIT(tag)), 303 writel((1 << MTIP_TAG_BIT(tag)),
294 port->cmd_issue[MTIP_TAG_INDEX(tag)]); 304 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
295 305
296 spin_unlock_irqrestore(&port->cmd_issue_lock, flags); 306 spin_unlock(&port->cmd_issue_lock);
307
308 /* Set the command's timeout value.*/
309 port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
310 MTIP_NCQ_COMMAND_TIMEOUT_MS);
297} 311}
298 312
299/* 313/*
@@ -422,6 +436,10 @@ static void mtip_init_port(struct mtip_port *port)
422 /* Clear any pending interrupts for this port */ 436 /* Clear any pending interrupts for this port */
423 writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT); 437 writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
424 438
439 /* Clear any pending interrupts on the HBA. */
440 writel(readl(port->dd->mmio + HOST_IRQ_STAT),
441 port->dd->mmio + HOST_IRQ_STAT);
442
425 /* Enable port interrupts */ 443 /* Enable port interrupts */
426 writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK); 444 writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
427} 445}
@@ -447,6 +465,9 @@ static void mtip_restart_port(struct mtip_port *port)
447 && time_before(jiffies, timeout)) 465 && time_before(jiffies, timeout))
448 ; 466 ;
449 467
468 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
469 return;
470
450 /* 471 /*
451 * Chip quirk: escalate to hba reset if 472 * Chip quirk: escalate to hba reset if
452 * PxCMD.CR not clear after 500 ms 473 * PxCMD.CR not clear after 500 ms
@@ -475,6 +496,9 @@ static void mtip_restart_port(struct mtip_port *port)
475 while (time_before(jiffies, timeout)) 496 while (time_before(jiffies, timeout))
476 ; 497 ;
477 498
499 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
500 return;
501
478 /* Clear PxSCTL.DET */ 502 /* Clear PxSCTL.DET */
479 writel(readl(port->mmio + PORT_SCR_CTL) & ~1, 503 writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
480 port->mmio + PORT_SCR_CTL); 504 port->mmio + PORT_SCR_CTL);
@@ -486,15 +510,35 @@ static void mtip_restart_port(struct mtip_port *port)
486 && time_before(jiffies, timeout)) 510 && time_before(jiffies, timeout))
487 ; 511 ;
488 512
513 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
514 return;
515
489 if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) 516 if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
490 dev_warn(&port->dd->pdev->dev, 517 dev_warn(&port->dd->pdev->dev,
491 "COM reset failed\n"); 518 "COM reset failed\n");
492 519
493 /* Clear SError, the PxSERR.DIAG.x should be set so clear it */ 520 mtip_init_port(port);
494 writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); 521 mtip_start_port(port);
495 522
496 /* Enable the DMA engine */ 523}
497 mtip_enable_engine(port, 1); 524
525/*
526 * Helper function for tag logging
527 */
528static void print_tags(struct driver_data *dd,
529 char *msg,
530 unsigned long *tagbits,
531 int cnt)
532{
533 unsigned char tagmap[128];
534 int group, tagmap_len = 0;
535
536 memset(tagmap, 0, sizeof(tagmap));
537 for (group = SLOTBITS_IN_LONGS; group > 0; group--)
538 tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
539 tagbits[group-1]);
540 dev_warn(&dd->pdev->dev,
541 "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
498} 542}
499 543
500/* 544/*
@@ -514,15 +558,18 @@ static void mtip_timeout_function(unsigned long int data)
514 int tag, cmdto_cnt = 0; 558 int tag, cmdto_cnt = 0;
515 unsigned int bit, group; 559 unsigned int bit, group;
516 unsigned int num_command_slots = port->dd->slot_groups * 32; 560 unsigned int num_command_slots = port->dd->slot_groups * 32;
561 unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
517 562
518 if (unlikely(!port)) 563 if (unlikely(!port))
519 return; 564 return;
520 565
521 if (atomic_read(&port->dd->resumeflag) == true) { 566 if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
522 mod_timer(&port->cmd_timer, 567 mod_timer(&port->cmd_timer,
523 jiffies + msecs_to_jiffies(30000)); 568 jiffies + msecs_to_jiffies(30000));
524 return; 569 return;
525 } 570 }
571 /* clear the tag accumulator */
572 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
526 573
527 for (tag = 0; tag < num_command_slots; tag++) { 574 for (tag = 0; tag < num_command_slots; tag++) {
528 /* 575 /*
@@ -540,12 +587,10 @@ static void mtip_timeout_function(unsigned long int data)
540 command = &port->commands[tag]; 587 command = &port->commands[tag];
541 fis = (struct host_to_dev_fis *) command->command; 588 fis = (struct host_to_dev_fis *) command->command;
542 589
543 dev_warn(&port->dd->pdev->dev, 590 set_bit(tag, tagaccum);
544 "Timeout for command tag %d\n", tag);
545
546 cmdto_cnt++; 591 cmdto_cnt++;
547 if (cmdto_cnt == 1) 592 if (cmdto_cnt == 1)
548 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); 593 set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
549 594
550 /* 595 /*
551 * Clear the completed bit. This should prevent 596 * Clear the completed bit. This should prevent
@@ -578,15 +623,29 @@ static void mtip_timeout_function(unsigned long int data)
578 } 623 }
579 } 624 }
580 625
581 if (cmdto_cnt) { 626 if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
582 dev_warn(&port->dd->pdev->dev, 627 print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
583 "%d commands timed out: restarting port", 628
584 cmdto_cnt);
585 mtip_restart_port(port); 629 mtip_restart_port(port);
586 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); 630 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
587 wake_up_interruptible(&port->svc_wait); 631 wake_up_interruptible(&port->svc_wait);
588 } 632 }
589 633
634 if (port->ic_pause_timer) {
635 to = port->ic_pause_timer + msecs_to_jiffies(1000);
636 if (time_after(jiffies, to)) {
637 if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
638 port->ic_pause_timer = 0;
639 clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
640 clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
641 clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
642 wake_up_interruptible(&port->svc_wait);
643 }
644
645
646 }
647 }
648
590 /* Restart the timer */ 649 /* Restart the timer */
591 mod_timer(&port->cmd_timer, 650 mod_timer(&port->cmd_timer,
592 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); 651 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
@@ -681,23 +740,18 @@ static void mtip_completion(struct mtip_port *port,
681 complete(waiting); 740 complete(waiting);
682} 741}
683 742
684/* 743static void mtip_null_completion(struct mtip_port *port,
685 * Helper function for tag logging 744 int tag,
686 */ 745 void *data,
687static void print_tags(struct driver_data *dd, 746 int status)
688 char *msg,
689 unsigned long *tagbits)
690{ 747{
691 unsigned int tag, count = 0; 748 return;
692
693 for (tag = 0; tag < (dd->slot_groups) * 32; tag++) {
694 if (test_bit(tag, tagbits))
695 count++;
696 }
697 if (count)
698 dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count);
699} 749}
700 750
751static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
752 dma_addr_t buffer_dma, unsigned int sectors);
753static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
754 struct smart_attr *attrib);
701/* 755/*
702 * Handle an error. 756 * Handle an error.
703 * 757 *
@@ -708,12 +762,16 @@ static void print_tags(struct driver_data *dd,
708 */ 762 */
709static void mtip_handle_tfe(struct driver_data *dd) 763static void mtip_handle_tfe(struct driver_data *dd)
710{ 764{
711 int group, tag, bit, reissue; 765 int group, tag, bit, reissue, rv;
712 struct mtip_port *port; 766 struct mtip_port *port;
713 struct mtip_cmd *command; 767 struct mtip_cmd *cmd;
714 u32 completed; 768 u32 completed;
715 struct host_to_dev_fis *fis; 769 struct host_to_dev_fis *fis;
716 unsigned long tagaccum[SLOTBITS_IN_LONGS]; 770 unsigned long tagaccum[SLOTBITS_IN_LONGS];
771 unsigned int cmd_cnt = 0;
772 unsigned char *buf;
773 char *fail_reason = NULL;
774 int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
717 775
718 dev_warn(&dd->pdev->dev, "Taskfile error\n"); 776 dev_warn(&dd->pdev->dev, "Taskfile error\n");
719 777
@@ -721,9 +779,23 @@ static void mtip_handle_tfe(struct driver_data *dd)
721 779
722 /* Stop the timer to prevent command timeouts. */ 780 /* Stop the timer to prevent command timeouts. */
723 del_timer(&port->cmd_timer); 781 del_timer(&port->cmd_timer);
782 set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
724 783
725 /* Set eh_active */ 784 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
726 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); 785 test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
786 cmd = &port->commands[MTIP_TAG_INTERNAL];
787 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
788
789 atomic_inc(&cmd->active); /* active > 1 indicates error */
790 if (cmd->comp_data && cmd->comp_func) {
791 cmd->comp_func(port, MTIP_TAG_INTERNAL,
792 cmd->comp_data, PORT_IRQ_TF_ERR);
793 }
794 goto handle_tfe_exit;
795 }
796
797 /* clear the tag accumulator */
798 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
727 799
728 /* Loop through all the groups */ 800 /* Loop through all the groups */
729 for (group = 0; group < dd->slot_groups; group++) { 801 for (group = 0; group < dd->slot_groups; group++) {
@@ -732,9 +804,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
732 /* clear completed status register in the hardware.*/ 804 /* clear completed status register in the hardware.*/
733 writel(completed, port->completed[group]); 805 writel(completed, port->completed[group]);
734 806
735 /* clear the tag accumulator */
736 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
737
738 /* Process successfully completed commands */ 807 /* Process successfully completed commands */
739 for (bit = 0; bit < 32 && completed; bit++) { 808 for (bit = 0; bit < 32 && completed; bit++) {
740 if (!(completed & (1<<bit))) 809 if (!(completed & (1<<bit)))
@@ -745,13 +814,14 @@ static void mtip_handle_tfe(struct driver_data *dd)
745 if (tag == MTIP_TAG_INTERNAL) 814 if (tag == MTIP_TAG_INTERNAL)
746 continue; 815 continue;
747 816
748 command = &port->commands[tag]; 817 cmd = &port->commands[tag];
749 if (likely(command->comp_func)) { 818 if (likely(cmd->comp_func)) {
750 set_bit(tag, tagaccum); 819 set_bit(tag, tagaccum);
751 atomic_set(&port->commands[tag].active, 0); 820 cmd_cnt++;
752 command->comp_func(port, 821 atomic_set(&cmd->active, 0);
822 cmd->comp_func(port,
753 tag, 823 tag,
754 command->comp_data, 824 cmd->comp_data,
755 0); 825 0);
756 } else { 826 } else {
757 dev_err(&port->dd->pdev->dev, 827 dev_err(&port->dd->pdev->dev,
@@ -765,12 +835,45 @@ static void mtip_handle_tfe(struct driver_data *dd)
765 } 835 }
766 } 836 }
767 } 837 }
768 print_tags(dd, "TFE tags completed:", tagaccum); 838
839 print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
769 840
770 /* Restart the port */ 841 /* Restart the port */
771 mdelay(20); 842 mdelay(20);
772 mtip_restart_port(port); 843 mtip_restart_port(port);
773 844
845 /* Trying to determine the cause of the error */
846 rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
847 dd->port->log_buf,
848 dd->port->log_buf_dma, 1);
849 if (rv) {
850 dev_warn(&dd->pdev->dev,
851 "Error in READ LOG EXT (10h) command\n");
852 /* non-critical error, don't fail the load */
853 } else {
854 buf = (unsigned char *)dd->port->log_buf;
855 if (buf[259] & 0x1) {
856 dev_info(&dd->pdev->dev,
857 "Write protect bit is set.\n");
858 set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
859 fail_all_ncq_write = 1;
860 fail_reason = "write protect";
861 }
862 if (buf[288] == 0xF7) {
863 dev_info(&dd->pdev->dev,
864 "Exceeded Tmax, drive in thermal shutdown.\n");
865 set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
866 fail_all_ncq_cmds = 1;
867 fail_reason = "thermal shutdown";
868 }
869 if (buf[288] == 0xBF) {
870 dev_info(&dd->pdev->dev,
871 "Drive indicates rebuild has failed.\n");
872 fail_all_ncq_cmds = 1;
873 fail_reason = "rebuild failed";
874 }
875 }
876
774 /* clear the tag accumulator */ 877 /* clear the tag accumulator */
775 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); 878 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
776 879
@@ -779,32 +882,47 @@ static void mtip_handle_tfe(struct driver_data *dd)
779 for (bit = 0; bit < 32; bit++) { 882 for (bit = 0; bit < 32; bit++) {
780 reissue = 1; 883 reissue = 1;
781 tag = (group << 5) + bit; 884 tag = (group << 5) + bit;
885 cmd = &port->commands[tag];
782 886
783 /* If the active bit is set re-issue the command */ 887 /* If the active bit is set re-issue the command */
784 if (atomic_read(&port->commands[tag].active) == 0) 888 if (atomic_read(&cmd->active) == 0)
785 continue; 889 continue;
786 890
787 fis = (struct host_to_dev_fis *) 891 fis = (struct host_to_dev_fis *)cmd->command;
788 port->commands[tag].command;
789 892
790 /* Should re-issue? */ 893 /* Should re-issue? */
791 if (tag == MTIP_TAG_INTERNAL || 894 if (tag == MTIP_TAG_INTERNAL ||
792 fis->command == ATA_CMD_SET_FEATURES) 895 fis->command == ATA_CMD_SET_FEATURES)
793 reissue = 0; 896 reissue = 0;
897 else {
898 if (fail_all_ncq_cmds ||
899 (fail_all_ncq_write &&
900 fis->command == ATA_CMD_FPDMA_WRITE)) {
901 dev_warn(&dd->pdev->dev,
902 " Fail: %s w/tag %d [%s].\n",
903 fis->command == ATA_CMD_FPDMA_WRITE ?
904 "write" : "read",
905 tag,
906 fail_reason != NULL ?
907 fail_reason : "unknown");
908 atomic_set(&cmd->active, 0);
909 if (cmd->comp_func) {
910 cmd->comp_func(port, tag,
911 cmd->comp_data,
912 -ENODATA);
913 }
914 continue;
915 }
916 }
794 917
795 /* 918 /*
796 * First check if this command has 919 * First check if this command has
797 * exceeded its retries. 920 * exceeded its retries.
798 */ 921 */
799 if (reissue && 922 if (reissue && (cmd->retries-- > 0)) {
800 (port->commands[tag].retries-- > 0)) {
801 923
802 set_bit(tag, tagaccum); 924 set_bit(tag, tagaccum);
803 925
804 /* Update the timeout value. */
805 port->commands[tag].comp_time =
806 jiffies + msecs_to_jiffies(
807 MTIP_NCQ_COMMAND_TIMEOUT_MS);
808 /* Re-issue the command. */ 926 /* Re-issue the command. */
809 mtip_issue_ncq_command(port, tag); 927 mtip_issue_ncq_command(port, tag);
810 928
@@ -814,13 +932,13 @@ static void mtip_handle_tfe(struct driver_data *dd)
814 /* Retire a command that will not be reissued */ 932 /* Retire a command that will not be reissued */
815 dev_warn(&port->dd->pdev->dev, 933 dev_warn(&port->dd->pdev->dev,
816 "retiring tag %d\n", tag); 934 "retiring tag %d\n", tag);
817 atomic_set(&port->commands[tag].active, 0); 935 atomic_set(&cmd->active, 0);
818 936
819 if (port->commands[tag].comp_func) 937 if (cmd->comp_func)
820 port->commands[tag].comp_func( 938 cmd->comp_func(
821 port, 939 port,
822 tag, 940 tag,
823 port->commands[tag].comp_data, 941 cmd->comp_data,
824 PORT_IRQ_TF_ERR); 942 PORT_IRQ_TF_ERR);
825 else 943 else
826 dev_warn(&port->dd->pdev->dev, 944 dev_warn(&port->dd->pdev->dev,
@@ -828,10 +946,11 @@ static void mtip_handle_tfe(struct driver_data *dd)
828 tag); 946 tag);
829 } 947 }
830 } 948 }
831 print_tags(dd, "TFE tags reissued:", tagaccum); 949 print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
832 950
951handle_tfe_exit:
833 /* clear eh_active */ 952 /* clear eh_active */
834 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); 953 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
835 wake_up_interruptible(&port->svc_wait); 954 wake_up_interruptible(&port->svc_wait);
836 955
837 mod_timer(&port->cmd_timer, 956 mod_timer(&port->cmd_timer,
@@ -851,6 +970,8 @@ static inline void mtip_process_sdbf(struct driver_data *dd)
851 /* walk all bits in all slot groups */ 970 /* walk all bits in all slot groups */
852 for (group = 0; group < dd->slot_groups; group++) { 971 for (group = 0; group < dd->slot_groups; group++) {
853 completed = readl(port->completed[group]); 972 completed = readl(port->completed[group]);
973 if (!completed)
974 continue;
854 975
855 /* clear completed status register in the hardware.*/ 976 /* clear completed status register in the hardware.*/
856 writel(completed, port->completed[group]); 977 writel(completed, port->completed[group]);
@@ -899,7 +1020,7 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
899 struct mtip_port *port = dd->port; 1020 struct mtip_port *port = dd->port;
900 struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; 1021 struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
901 1022
902 if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && 1023 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
903 (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) 1024 (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
904 & (1 << MTIP_TAG_INTERNAL))) { 1025 & (1 << MTIP_TAG_INTERNAL))) {
905 if (cmd->comp_func) { 1026 if (cmd->comp_func) {
@@ -911,8 +1032,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
911 } 1032 }
912 } 1033 }
913 1034
914 dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat);
915
916 return; 1035 return;
917} 1036}
918 1037
@@ -968,6 +1087,9 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
968 /* don't proceed further */ 1087 /* don't proceed further */
969 return IRQ_HANDLED; 1088 return IRQ_HANDLED;
970 } 1089 }
1090 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1091 &dd->dd_flag))
1092 return rv;
971 1093
972 mtip_process_errors(dd, port_stat & PORT_IRQ_ERR); 1094 mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
973 } 1095 }
@@ -1015,6 +1137,39 @@ static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1015 port->cmd_issue[MTIP_TAG_INDEX(tag)]); 1137 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1016} 1138}
1017 1139
1140static bool mtip_pause_ncq(struct mtip_port *port,
1141 struct host_to_dev_fis *fis)
1142{
1143 struct host_to_dev_fis *reply;
1144 unsigned long task_file_data;
1145
1146 reply = port->rxfis + RX_FIS_D2H_REG;
1147 task_file_data = readl(port->mmio+PORT_TFDATA);
1148
1149 if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT))
1150 return false;
1151
1152 if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
1153 set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1154 port->ic_pause_timer = jiffies;
1155 return true;
1156 } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
1157 (fis->features == 0x03)) {
1158 set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1159 port->ic_pause_timer = jiffies;
1160 return true;
1161 } else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
1162 ((fis->command == 0xFC) &&
1163 (fis->features == 0x27 || fis->features == 0x72 ||
1164 fis->features == 0x62 || fis->features == 0x26))) {
1165 /* Com reset after secure erase or lowlevel format */
1166 mtip_restart_port(port);
1167 return false;
1168 }
1169
1170 return false;
1171}
1172
1018/* 1173/*
1019 * Wait for port to quiesce 1174 * Wait for port to quiesce
1020 * 1175 *
@@ -1033,11 +1188,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1033 1188
1034 to = jiffies + msecs_to_jiffies(timeout); 1189 to = jiffies + msecs_to_jiffies(timeout);
1035 do { 1190 do {
1036 if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) && 1191 if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
1037 test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { 1192 test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
1038 msleep(20); 1193 msleep(20);
1039 continue; /* svc thd is actively issuing commands */ 1194 continue; /* svc thd is actively issuing commands */
1040 } 1195 }
1196 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1197 return -EFAULT;
1041 /* 1198 /*
1042 * Ignore s_active bit 0 of array element 0. 1199 * Ignore s_active bit 0 of array element 0.
1043 * This bit will always be set 1200 * This bit will always be set
@@ -1074,7 +1231,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1074 * -EAGAIN Time out waiting for command to complete. 1231 * -EAGAIN Time out waiting for command to complete.
1075 */ 1232 */
1076static int mtip_exec_internal_command(struct mtip_port *port, 1233static int mtip_exec_internal_command(struct mtip_port *port,
1077 void *fis, 1234 struct host_to_dev_fis *fis,
1078 int fis_len, 1235 int fis_len,
1079 dma_addr_t buffer, 1236 dma_addr_t buffer,
1080 int buf_len, 1237 int buf_len,
@@ -1084,8 +1241,9 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1084{ 1241{
1085 struct mtip_cmd_sg *command_sg; 1242 struct mtip_cmd_sg *command_sg;
1086 DECLARE_COMPLETION_ONSTACK(wait); 1243 DECLARE_COMPLETION_ONSTACK(wait);
1087 int rv = 0; 1244 int rv = 0, ready2go = 1;
1088 struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; 1245 struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1246 unsigned long to;
1089 1247
1090 /* Make sure the buffer is 8 byte aligned. This is asic specific. */ 1248 /* Make sure the buffer is 8 byte aligned. This is asic specific. */
1091 if (buffer & 0x00000007) { 1249 if (buffer & 0x00000007) {
@@ -1094,23 +1252,38 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1094 return -EFAULT; 1252 return -EFAULT;
1095 } 1253 }
1096 1254
1097 /* Only one internal command should be running at a time */ 1255 to = jiffies + msecs_to_jiffies(timeout);
1098 if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) { 1256 do {
1257 ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
1258 port->allocated);
1259 if (ready2go)
1260 break;
1261 mdelay(100);
1262 } while (time_before(jiffies, to));
1263 if (!ready2go) {
1099 dev_warn(&port->dd->pdev->dev, 1264 dev_warn(&port->dd->pdev->dev,
1100 "Internal command already active\n"); 1265 "Internal cmd active. new cmd [%02X]\n", fis->command);
1101 return -EBUSY; 1266 return -EBUSY;
1102 } 1267 }
1103 set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); 1268 set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1269 port->ic_pause_timer = 0;
1270
1271 if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
1272 clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1273 else if (fis->command == ATA_CMD_DOWNLOAD_MICRO)
1274 clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1104 1275
1105 if (atomic == GFP_KERNEL) { 1276 if (atomic == GFP_KERNEL) {
1106 /* wait for io to complete if non atomic */ 1277 if (fis->command != ATA_CMD_STANDBYNOW1) {
1107 if (mtip_quiesce_io(port, 5000) < 0) { 1278 /* wait for io to complete if non atomic */
1108 dev_warn(&port->dd->pdev->dev, 1279 if (mtip_quiesce_io(port, 5000) < 0) {
1109 "Failed to quiesce IO\n"); 1280 dev_warn(&port->dd->pdev->dev,
1110 release_slot(port, MTIP_TAG_INTERNAL); 1281 "Failed to quiesce IO\n");
1111 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); 1282 release_slot(port, MTIP_TAG_INTERNAL);
1112 wake_up_interruptible(&port->svc_wait); 1283 clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1113 return -EBUSY; 1284 wake_up_interruptible(&port->svc_wait);
1285 return -EBUSY;
1286 }
1114 } 1287 }
1115 1288
1116 /* Set the completion function and data for the command. */ 1289 /* Set the completion function and data for the command. */
@@ -1120,7 +1293,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1120 } else { 1293 } else {
1121 /* Clear completion - we're going to poll */ 1294 /* Clear completion - we're going to poll */
1122 int_cmd->comp_data = NULL; 1295 int_cmd->comp_data = NULL;
1123 int_cmd->comp_func = NULL; 1296 int_cmd->comp_func = mtip_null_completion;
1124 } 1297 }
1125 1298
1126 /* Copy the command to the command table */ 1299 /* Copy the command to the command table */
@@ -1159,38 +1332,60 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1159 "Internal command did not complete [%d] " 1332 "Internal command did not complete [%d] "
1160 "within timeout of %lu ms\n", 1333 "within timeout of %lu ms\n",
1161 atomic, timeout); 1334 atomic, timeout);
1335 if (mtip_check_surprise_removal(port->dd->pdev) ||
1336 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1337 &port->dd->dd_flag)) {
1338 rv = -ENXIO;
1339 goto exec_ic_exit;
1340 }
1162 rv = -EAGAIN; 1341 rv = -EAGAIN;
1163 } 1342 }
1164
1165 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1166 & (1 << MTIP_TAG_INTERNAL)) {
1167 dev_warn(&port->dd->pdev->dev,
1168 "Retiring internal command but CI is 1.\n");
1169 }
1170
1171 } else { 1343 } else {
1172 /* Spin for <timeout> checking if command still outstanding */ 1344 /* Spin for <timeout> checking if command still outstanding */
1173 timeout = jiffies + msecs_to_jiffies(timeout); 1345 timeout = jiffies + msecs_to_jiffies(timeout);
1346 while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1347 & (1 << MTIP_TAG_INTERNAL))
1348 && time_before(jiffies, timeout)) {
1349 if (mtip_check_surprise_removal(port->dd->pdev)) {
1350 rv = -ENXIO;
1351 goto exec_ic_exit;
1352 }
1353 if ((fis->command != ATA_CMD_STANDBYNOW1) &&
1354 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1355 &port->dd->dd_flag)) {
1356 rv = -ENXIO;
1357 goto exec_ic_exit;
1358 }
1359 if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) {
1360 atomic_inc(&int_cmd->active); /* error */
1361 break;
1362 }
1363 }
1364 }
1174 1365
1175 while ((readl( 1366 if (atomic_read(&int_cmd->active) > 1) {
1176 port->cmd_issue[MTIP_TAG_INTERNAL]) 1367 dev_err(&port->dd->pdev->dev,
1177 & (1 << MTIP_TAG_INTERNAL)) 1368 "Internal command [%02X] failed\n", fis->command);
1178 && time_before(jiffies, timeout)) 1369 rv = -EIO;
1179 ; 1370 }
1180 1371 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1181 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1182 & (1 << MTIP_TAG_INTERNAL)) { 1372 & (1 << MTIP_TAG_INTERNAL)) {
1183 dev_err(&port->dd->pdev->dev, 1373 rv = -ENXIO;
1184 "Internal command did not complete [%d]\n", 1374 if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1185 atomic); 1375 &port->dd->dd_flag)) {
1376 mtip_restart_port(port);
1186 rv = -EAGAIN; 1377 rv = -EAGAIN;
1187 } 1378 }
1188 } 1379 }
1189 1380exec_ic_exit:
1190 /* Clear the allocated and active bits for the internal command. */ 1381 /* Clear the allocated and active bits for the internal command. */
1191 atomic_set(&int_cmd->active, 0); 1382 atomic_set(&int_cmd->active, 0);
1192 release_slot(port, MTIP_TAG_INTERNAL); 1383 release_slot(port, MTIP_TAG_INTERNAL);
1193 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); 1384 if (rv >= 0 && mtip_pause_ncq(port, fis)) {
1385 /* NCQ paused */
1386 return rv;
1387 }
1388 clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1194 wake_up_interruptible(&port->svc_wait); 1389 wake_up_interruptible(&port->svc_wait);
1195 1390
1196 return rv; 1391 return rv;
@@ -1240,6 +1435,9 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1240 int rv = 0; 1435 int rv = 0;
1241 struct host_to_dev_fis fis; 1436 struct host_to_dev_fis fis;
1242 1437
1438 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1439 return -EFAULT;
1440
1243 /* Build the FIS. */ 1441 /* Build the FIS. */
1244 memset(&fis, 0, sizeof(struct host_to_dev_fis)); 1442 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1245 fis.type = 0x27; 1443 fis.type = 0x27;
@@ -1313,6 +1511,7 @@ static int mtip_standby_immediate(struct mtip_port *port)
1313{ 1511{
1314 int rv; 1512 int rv;
1315 struct host_to_dev_fis fis; 1513 struct host_to_dev_fis fis;
1514 unsigned long start;
1316 1515
1317 /* Build the FIS. */ 1516 /* Build the FIS. */
1318 memset(&fis, 0, sizeof(struct host_to_dev_fis)); 1517 memset(&fis, 0, sizeof(struct host_to_dev_fis));
@@ -1320,15 +1519,150 @@ static int mtip_standby_immediate(struct mtip_port *port)
1320 fis.opts = 1 << 7; 1519 fis.opts = 1 << 7;
1321 fis.command = ATA_CMD_STANDBYNOW1; 1520 fis.command = ATA_CMD_STANDBYNOW1;
1322 1521
1323 /* Execute the command. Use a 15-second timeout for large drives. */ 1522 start = jiffies;
1324 rv = mtip_exec_internal_command(port, 1523 rv = mtip_exec_internal_command(port,
1325 &fis, 1524 &fis,
1326 5, 1525 5,
1327 0, 1526 0,
1328 0, 1527 0,
1329 0, 1528 0,
1330 GFP_KERNEL, 1529 GFP_ATOMIC,
1331 15000); 1530 15000);
1531 dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
1532 jiffies_to_msecs(jiffies - start));
1533 if (rv)
1534 dev_warn(&port->dd->pdev->dev,
1535 "STANDBY IMMEDIATE command failed.\n");
1536
1537 return rv;
1538}
1539
1540/*
1541 * Issue a READ LOG EXT command to the device.
1542 *
1543 * @port pointer to the port structure.
1544 * @page page number to fetch
1545 * @buffer pointer to buffer
1546 * @buffer_dma dma address corresponding to @buffer
1547 * @sectors page length to fetch, in sectors
1548 *
1549 * return value
1550 * @rv return value from mtip_exec_internal_command()
1551 */
1552static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
1553 dma_addr_t buffer_dma, unsigned int sectors)
1554{
1555 struct host_to_dev_fis fis;
1556
1557 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1558 fis.type = 0x27;
1559 fis.opts = 1 << 7;
1560 fis.command = ATA_CMD_READ_LOG_EXT;
1561 fis.sect_count = sectors & 0xFF;
1562 fis.sect_cnt_ex = (sectors >> 8) & 0xFF;
1563 fis.lba_low = page;
1564 fis.lba_mid = 0;
1565 fis.device = ATA_DEVICE_OBS;
1566
1567 memset(buffer, 0, sectors * ATA_SECT_SIZE);
1568
1569 return mtip_exec_internal_command(port,
1570 &fis,
1571 5,
1572 buffer_dma,
1573 sectors * ATA_SECT_SIZE,
1574 0,
1575 GFP_ATOMIC,
1576 MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
1577}
1578
1579/*
1580 * Issue a SMART READ DATA command to the device.
1581 *
1582 * @port pointer to the port structure.
1583 * @buffer pointer to buffer
1584 * @buffer_dma dma address corresponding to @buffer
1585 *
1586 * return value
1587 * @rv return value from mtip_exec_internal_command()
1588 */
1589static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
1590 dma_addr_t buffer_dma)
1591{
1592 struct host_to_dev_fis fis;
1593
1594 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1595 fis.type = 0x27;
1596 fis.opts = 1 << 7;
1597 fis.command = ATA_CMD_SMART;
1598 fis.features = 0xD0;
1599 fis.sect_count = 1;
1600 fis.lba_mid = 0x4F;
1601 fis.lba_hi = 0xC2;
1602 fis.device = ATA_DEVICE_OBS;
1603
1604 return mtip_exec_internal_command(port,
1605 &fis,
1606 5,
1607 buffer_dma,
1608 ATA_SECT_SIZE,
1609 0,
1610 GFP_ATOMIC,
1611 15000);
1612}
1613
1614/*
1615 * Get the value of a smart attribute
1616 *
1617 * @port pointer to the port structure
1618 * @id attribute number
1619 * @attrib pointer to return attrib information corresponding to @id
1620 *
1621 * return value
1622 * -EINVAL NULL buffer passed or unsupported attribute @id.
1623 * -EPERM Identify data not valid, SMART not supported or not enabled
1624 */
1625static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
1626 struct smart_attr *attrib)
1627{
1628 int rv, i;
1629 struct smart_attr *pattr;
1630
1631 if (!attrib)
1632 return -EINVAL;
1633
1634 if (!port->identify_valid) {
1635 dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
1636 return -EPERM;
1637 }
1638 if (!(port->identify[82] & 0x1)) {
1639 dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
1640 return -EPERM;
1641 }
1642 if (!(port->identify[85] & 0x1)) {
1643 dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
1644 return -EPERM;
1645 }
1646
1647 memset(port->smart_buf, 0, ATA_SECT_SIZE);
1648 rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
1649 if (rv) {
1650 dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
1651 return rv;
1652 }
1653
1654 pattr = (struct smart_attr *)(port->smart_buf + 2);
1655 for (i = 0; i < 29; i++, pattr++)
1656 if (pattr->attr_id == id) {
1657 memcpy(attrib, pattr, sizeof(struct smart_attr));
1658 break;
1659 }
1660
1661 if (i == 29) {
1662 dev_warn(&port->dd->pdev->dev,
1663 "Query for invalid SMART attribute ID\n");
1664 rv = -EINVAL;
1665 }
1332 1666
1333 return rv; 1667 return rv;
1334} 1668}
@@ -1504,10 +1838,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
1504 fis.cyl_hi = command[5]; 1838 fis.cyl_hi = command[5];
1505 fis.device = command[6] & ~0x10; /* Clear the dev bit*/ 1839 fis.device = command[6] & ~0x10; /* Clear the dev bit*/
1506 1840
1507 1841 dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
1508 dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, "
1509 "nsect %x, sect %x, lcyl %x, "
1510 "hcyl %x, sel %x\n",
1511 __func__, 1842 __func__,
1512 command[0], 1843 command[0],
1513 command[1], 1844 command[1],
@@ -1534,8 +1865,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
1534 command[4] = reply->cyl_low; 1865 command[4] = reply->cyl_low;
1535 command[5] = reply->cyl_hi; 1866 command[5] = reply->cyl_hi;
1536 1867
1537 dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, " 1868 dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
1538 "err %x , cyl_lo %x cyl_hi %x\n",
1539 __func__, 1869 __func__,
1540 command[0], 1870 command[0],
1541 command[1], 1871 command[1],
@@ -1562,13 +1892,33 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
1562 void __user *user_buffer) 1892 void __user *user_buffer)
1563{ 1893{
1564 struct host_to_dev_fis fis; 1894 struct host_to_dev_fis fis;
1565 struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); 1895 struct host_to_dev_fis *reply;
1896 u8 *buf = NULL;
1897 dma_addr_t dma_addr = 0;
1898 int rv = 0, xfer_sz = command[3];
1899
1900 if (xfer_sz) {
1901 if (user_buffer)
1902 return -EFAULT;
1903
1904 buf = dmam_alloc_coherent(&port->dd->pdev->dev,
1905 ATA_SECT_SIZE * xfer_sz,
1906 &dma_addr,
1907 GFP_KERNEL);
1908 if (!buf) {
1909 dev_err(&port->dd->pdev->dev,
1910 "Memory allocation failed (%d bytes)\n",
1911 ATA_SECT_SIZE * xfer_sz);
1912 return -ENOMEM;
1913 }
1914 memset(buf, 0, ATA_SECT_SIZE * xfer_sz);
1915 }
1566 1916
1567 /* Build the FIS. */ 1917 /* Build the FIS. */
1568 memset(&fis, 0, sizeof(struct host_to_dev_fis)); 1918 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1569 fis.type = 0x27; 1919 fis.type = 0x27;
1570 fis.opts = 1 << 7; 1920 fis.opts = 1 << 7;
1571 fis.command = command[0]; 1921 fis.command = command[0];
1572 fis.features = command[2]; 1922 fis.features = command[2];
1573 fis.sect_count = command[3]; 1923 fis.sect_count = command[3];
1574 if (fis.command == ATA_CMD_SMART) { 1924 if (fis.command == ATA_CMD_SMART) {
@@ -1577,8 +1927,13 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
1577 fis.cyl_hi = 0xC2; 1927 fis.cyl_hi = 0xC2;
1578 } 1928 }
1579 1929
1930 if (xfer_sz)
1931 reply = (port->rxfis + RX_FIS_PIO_SETUP);
1932 else
1933 reply = (port->rxfis + RX_FIS_D2H_REG);
1934
1580 dbg_printk(MTIP_DRV_NAME 1935 dbg_printk(MTIP_DRV_NAME
1581 "%s: User Command: cmd %x, sect %x, " 1936 " %s: User Command: cmd %x, sect %x, "
1582 "feat %x, sectcnt %x\n", 1937 "feat %x, sectcnt %x\n",
1583 __func__, 1938 __func__,
1584 command[0], 1939 command[0],
@@ -1586,43 +1941,46 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
1586 command[2], 1941 command[2],
1587 command[3]); 1942 command[3]);
1588 1943
1589 memset(port->sector_buffer, 0x00, ATA_SECT_SIZE);
1590
1591 /* Execute the command. */ 1944 /* Execute the command. */
1592 if (mtip_exec_internal_command(port, 1945 if (mtip_exec_internal_command(port,
1593 &fis, 1946 &fis,
1594 5, 1947 5,
1595 port->sector_buffer_dma, 1948 (xfer_sz ? dma_addr : 0),
1596 (command[3] != 0) ? ATA_SECT_SIZE : 0, 1949 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
1597 0, 1950 0,
1598 GFP_KERNEL, 1951 GFP_KERNEL,
1599 MTIP_IOCTL_COMMAND_TIMEOUT_MS) 1952 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
1600 < 0) { 1953 < 0) {
1601 return -1; 1954 rv = -EFAULT;
1955 goto exit_drive_command;
1602 } 1956 }
1603 1957
1604 /* Collect the completion status. */ 1958 /* Collect the completion status. */
1605 command[0] = reply->command; /* Status*/ 1959 command[0] = reply->command; /* Status*/
1606 command[1] = reply->features; /* Error*/ 1960 command[1] = reply->features; /* Error*/
1607 command[2] = command[3]; 1961 command[2] = reply->sect_count;
1608 1962
1609 dbg_printk(MTIP_DRV_NAME 1963 dbg_printk(MTIP_DRV_NAME
1610 "%s: Completion Status: stat %x, " 1964 " %s: Completion Status: stat %x, "
1611 "err %x, cmd %x\n", 1965 "err %x, nsect %x\n",
1612 __func__, 1966 __func__,
1613 command[0], 1967 command[0],
1614 command[1], 1968 command[1],
1615 command[2]); 1969 command[2]);
1616 1970
1617 if (user_buffer && command[3]) { 1971 if (xfer_sz) {
1618 if (copy_to_user(user_buffer, 1972 if (copy_to_user(user_buffer,
1619 port->sector_buffer, 1973 buf,
1620 ATA_SECT_SIZE * command[3])) { 1974 ATA_SECT_SIZE * command[3])) {
1621 return -EFAULT; 1975 rv = -EFAULT;
1976 goto exit_drive_command;
1622 } 1977 }
1623 } 1978 }
1624 1979exit_drive_command:
1625 return 0; 1980 if (buf)
1981 dmam_free_coherent(&port->dd->pdev->dev,
1982 ATA_SECT_SIZE * xfer_sz, buf, dma_addr);
1983 return rv;
1626} 1984}
1627 1985
1628/* 1986/*
@@ -1672,6 +2030,32 @@ static unsigned int implicit_sector(unsigned char command,
1672 return rv; 2030 return rv;
1673} 2031}
1674 2032
2033static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout)
2034{
2035 switch (fis->command) {
2036 case ATA_CMD_DOWNLOAD_MICRO:
2037 *timeout = 120000; /* 2 minutes */
2038 break;
2039 case ATA_CMD_SEC_ERASE_UNIT:
2040 case 0xFC:
2041 *timeout = 240000; /* 4 minutes */
2042 break;
2043 case ATA_CMD_STANDBYNOW1:
2044 *timeout = 10000; /* 10 seconds */
2045 break;
2046 case 0xF7:
2047 case 0xFA:
2048 *timeout = 60000; /* 60 seconds */
2049 break;
2050 case ATA_CMD_SMART:
2051 *timeout = 15000; /* 15 seconds */
2052 break;
2053 default:
2054 *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
2055 break;
2056 }
2057}
2058
1675/* 2059/*
1676 * Executes a taskfile 2060 * Executes a taskfile
1677 * See ide_taskfile_ioctl() for derivation 2061 * See ide_taskfile_ioctl() for derivation
@@ -1692,7 +2076,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
1692 unsigned int taskin = 0; 2076 unsigned int taskin = 0;
1693 unsigned int taskout = 0; 2077 unsigned int taskout = 0;
1694 u8 nsect = 0; 2078 u8 nsect = 0;
1695 unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; 2079 unsigned int timeout;
1696 unsigned int force_single_sector; 2080 unsigned int force_single_sector;
1697 unsigned int transfer_size; 2081 unsigned int transfer_size;
1698 unsigned long task_file_data; 2082 unsigned long task_file_data;
@@ -1810,9 +2194,10 @@ static int exec_drive_taskfile(struct driver_data *dd,
1810 } 2194 }
1811 2195
1812 dbg_printk(MTIP_DRV_NAME 2196 dbg_printk(MTIP_DRV_NAME
1813 "taskfile: cmd %x, feat %x, nsect %x," 2197 " %s: cmd %x, feat %x, nsect %x,"
1814 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x," 2198 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
1815 " head/dev %x\n", 2199 " head/dev %x\n",
2200 __func__,
1816 fis.command, 2201 fis.command,
1817 fis.features, 2202 fis.features,
1818 fis.sect_count, 2203 fis.sect_count,
@@ -1821,32 +2206,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
1821 fis.lba_hi, 2206 fis.lba_hi,
1822 fis.device); 2207 fis.device);
1823 2208
1824 switch (fis.command) { 2209 mtip_set_timeout(&fis, &timeout);
1825 case ATA_CMD_DOWNLOAD_MICRO:
1826 /* Change timeout for Download Microcode to 60 seconds.*/
1827 timeout = 60000;
1828 break;
1829 case ATA_CMD_SEC_ERASE_UNIT:
1830 /* Change timeout for Security Erase Unit to 4 minutes.*/
1831 timeout = 240000;
1832 break;
1833 case ATA_CMD_STANDBYNOW1:
1834 /* Change timeout for standby immediate to 10 seconds.*/
1835 timeout = 10000;
1836 break;
1837 case 0xF7:
1838 case 0xFA:
1839 /* Change timeout for vendor unique command to 10 secs */
1840 timeout = 10000;
1841 break;
1842 case ATA_CMD_SMART:
1843 /* Change timeout for vendor unique command to 10 secs */
1844 timeout = 10000;
1845 break;
1846 default:
1847 timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
1848 break;
1849 }
1850 2210
1851 /* Determine the correct transfer size.*/ 2211 /* Determine the correct transfer size.*/
1852 if (force_single_sector) 2212 if (force_single_sector)
@@ -1903,18 +2263,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
1903 req_task->hob_ports[1] = reply->features_ex; 2263 req_task->hob_ports[1] = reply->features_ex;
1904 req_task->hob_ports[2] = reply->sect_cnt_ex; 2264 req_task->hob_ports[2] = reply->sect_cnt_ex;
1905 } 2265 }
1906
1907 /* Com rest after secure erase or lowlevel format */
1908 if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
1909 ((fis.command == 0xFC) &&
1910 (fis.features == 0x27 || fis.features == 0x72 ||
1911 fis.features == 0x62 || fis.features == 0x26))) &&
1912 !(reply->command & 1)) {
1913 mtip_restart_port(dd->port);
1914 }
1915
1916 dbg_printk(MTIP_DRV_NAME 2266 dbg_printk(MTIP_DRV_NAME
1917 "%s: Completion: stat %x," 2267 " %s: Completion: stat %x,"
1918 "err %x, sect_cnt %x, lbalo %x," 2268 "err %x, sect_cnt %x, lbalo %x,"
1919 "lbamid %x, lbahi %x, dev %x\n", 2269 "lbamid %x, lbahi %x, dev %x\n",
1920 __func__, 2270 __func__,
@@ -1973,13 +2323,12 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
1973{ 2323{
1974 switch (cmd) { 2324 switch (cmd) {
1975 case HDIO_GET_IDENTITY: 2325 case HDIO_GET_IDENTITY:
1976 if (mtip_get_identify(dd->port, (void __user *) arg) < 0) { 2326 {
1977 dev_warn(&dd->pdev->dev, 2327 if (copy_to_user((void __user *)arg, dd->port->identify,
1978 "Unable to read identity\n"); 2328 sizeof(u16) * ATA_ID_WORDS))
1979 return -EIO; 2329 return -EFAULT;
1980 }
1981
1982 break; 2330 break;
2331 }
1983 case HDIO_DRIVE_CMD: 2332 case HDIO_DRIVE_CMD:
1984 { 2333 {
1985 u8 drive_command[4]; 2334 u8 drive_command[4];
@@ -2080,14 +2429,10 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
2080 struct host_to_dev_fis *fis; 2429 struct host_to_dev_fis *fis;
2081 struct mtip_port *port = dd->port; 2430 struct mtip_port *port = dd->port;
2082 struct mtip_cmd *command = &port->commands[tag]; 2431 struct mtip_cmd *command = &port->commands[tag];
2432 int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2083 2433
2084 /* Map the scatter list for DMA access */ 2434 /* Map the scatter list for DMA access */
2085 if (dir == READ) 2435 nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
2086 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2087 nents, DMA_FROM_DEVICE);
2088 else
2089 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2090 nents, DMA_TO_DEVICE);
2091 2436
2092 command->scatter_ents = nents; 2437 command->scatter_ents = nents;
2093 2438
@@ -2127,7 +2472,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
2127 */ 2472 */
2128 command->comp_data = dd; 2473 command->comp_data = dd;
2129 command->comp_func = mtip_async_complete; 2474 command->comp_func = mtip_async_complete;
2130 command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 2475 command->direction = dma_dir;
2131 2476
2132 /* 2477 /*
2133 * Set the completion function and data for the command passed 2478 * Set the completion function and data for the command passed
@@ -2140,19 +2485,16 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
2140 * To prevent this command from being issued 2485 * To prevent this command from being issued
2141 * if an internal command is in progress or error handling is active. 2486 * if an internal command is in progress or error handling is active.
2142 */ 2487 */
2143 if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) || 2488 if (port->flags & MTIP_PF_PAUSE_IO) {
2144 test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
2145 set_bit(tag, port->cmds_to_issue); 2489 set_bit(tag, port->cmds_to_issue);
2146 set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); 2490 set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2147 return; 2491 return;
2148 } 2492 }
2149 2493
2150 /* Issue the command to the hardware */ 2494 /* Issue the command to the hardware */
2151 mtip_issue_ncq_command(port, tag); 2495 mtip_issue_ncq_command(port, tag);
2152 2496
2153 /* Set the command's timeout value.*/ 2497 return;
2154 port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
2155 MTIP_NCQ_COMMAND_TIMEOUT_MS);
2156} 2498}
2157 2499
2158/* 2500/*
@@ -2191,8 +2533,14 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2191 down(&dd->port->cmd_slot); 2533 down(&dd->port->cmd_slot);
2192 *tag = get_slot(dd->port); 2534 *tag = get_slot(dd->port);
2193 2535
2194 if (unlikely(*tag < 0)) 2536 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
2537 up(&dd->port->cmd_slot);
2538 return NULL;
2539 }
2540 if (unlikely(*tag < 0)) {
2541 up(&dd->port->cmd_slot);
2195 return NULL; 2542 return NULL;
2543 }
2196 2544
2197 return dd->port->commands[*tag].sg; 2545 return dd->port->commands[*tag].sg;
2198} 2546}
@@ -2207,7 +2555,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2207 * return value 2555 * return value
2208 * The size, in bytes, of the data copied into buf. 2556 * The size, in bytes, of the data copied into buf.
2209 */ 2557 */
2210static ssize_t hw_show_registers(struct device *dev, 2558static ssize_t mtip_hw_show_registers(struct device *dev,
2211 struct device_attribute *attr, 2559 struct device_attribute *attr,
2212 char *buf) 2560 char *buf)
2213{ 2561{
@@ -2216,44 +2564,97 @@ static ssize_t hw_show_registers(struct device *dev,
2216 int size = 0; 2564 int size = 0;
2217 int n; 2565 int n;
2218 2566
2219 size += sprintf(&buf[size], "%s:\ns_active:\n", __func__); 2567 size += sprintf(&buf[size], "Hardware\n--------\n");
2568 size += sprintf(&buf[size], "S ACTive : [ 0x");
2220 2569
2221 for (n = 0; n < dd->slot_groups; n++) 2570 for (n = dd->slot_groups-1; n >= 0; n--)
2222 size += sprintf(&buf[size], "0x%08x\n", 2571 size += sprintf(&buf[size], "%08X ",
2223 readl(dd->port->s_active[n])); 2572 readl(dd->port->s_active[n]));
2224 2573
2225 size += sprintf(&buf[size], "Command Issue:\n"); 2574 size += sprintf(&buf[size], "]\n");
2575 size += sprintf(&buf[size], "Command Issue : [ 0x");
2226 2576
2227 for (n = 0; n < dd->slot_groups; n++) 2577 for (n = dd->slot_groups-1; n >= 0; n--)
2228 size += sprintf(&buf[size], "0x%08x\n", 2578 size += sprintf(&buf[size], "%08X ",
2229 readl(dd->port->cmd_issue[n])); 2579 readl(dd->port->cmd_issue[n]));
2230 2580
2231 size += sprintf(&buf[size], "Allocated:\n"); 2581 size += sprintf(&buf[size], "]\n");
2582 size += sprintf(&buf[size], "Completed : [ 0x");
2583
2584 for (n = dd->slot_groups-1; n >= 0; n--)
2585 size += sprintf(&buf[size], "%08X ",
2586 readl(dd->port->completed[n]));
2587
2588 size += sprintf(&buf[size], "]\n");
2589 size += sprintf(&buf[size], "PORT IRQ STAT : [ 0x%08X ]\n",
2590 readl(dd->port->mmio + PORT_IRQ_STAT));
2591 size += sprintf(&buf[size], "HOST IRQ STAT : [ 0x%08X ]\n",
2592 readl(dd->mmio + HOST_IRQ_STAT));
2593 size += sprintf(&buf[size], "\n");
2594
2595 size += sprintf(&buf[size], "Local\n-----\n");
2596 size += sprintf(&buf[size], "Allocated : [ 0x");
2232 2597
2233 for (n = 0; n < dd->slot_groups; n++) { 2598 for (n = dd->slot_groups-1; n >= 0; n--) {
2234 if (sizeof(long) > sizeof(u32)) 2599 if (sizeof(long) > sizeof(u32))
2235 group_allocated = 2600 group_allocated =
2236 dd->port->allocated[n/2] >> (32*(n&1)); 2601 dd->port->allocated[n/2] >> (32*(n&1));
2237 else 2602 else
2238 group_allocated = dd->port->allocated[n]; 2603 group_allocated = dd->port->allocated[n];
2239 size += sprintf(&buf[size], "0x%08x\n", 2604 size += sprintf(&buf[size], "%08X ", group_allocated);
2240 group_allocated);
2241 } 2605 }
2606 size += sprintf(&buf[size], "]\n");
2242 2607
2243 size += sprintf(&buf[size], "completed:\n"); 2608 size += sprintf(&buf[size], "Commands in Q: [ 0x");
2244 2609
2245 for (n = 0; n < dd->slot_groups; n++) 2610 for (n = dd->slot_groups-1; n >= 0; n--) {
2246 size += sprintf(&buf[size], "0x%08x\n", 2611 if (sizeof(long) > sizeof(u32))
2247 readl(dd->port->completed[n])); 2612 group_allocated =
2613 dd->port->cmds_to_issue[n/2] >> (32*(n&1));
2614 else
2615 group_allocated = dd->port->cmds_to_issue[n];
2616 size += sprintf(&buf[size], "%08X ", group_allocated);
2617 }
2618 size += sprintf(&buf[size], "]\n");
2248 2619
2249 size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n", 2620 return size;
2250 readl(dd->port->mmio + PORT_IRQ_STAT)); 2621}
2251 size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n", 2622
2252 readl(dd->mmio + HOST_IRQ_STAT)); 2623static ssize_t mtip_hw_show_status(struct device *dev,
2624 struct device_attribute *attr,
2625 char *buf)
2626{
2627 struct driver_data *dd = dev_to_disk(dev)->private_data;
2628 int size = 0;
2629
2630 if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
2631 size += sprintf(buf, "%s", "thermal_shutdown\n");
2632 else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
2633 size += sprintf(buf, "%s", "write_protect\n");
2634 else
2635 size += sprintf(buf, "%s", "online\n");
2636
2637 return size;
2638}
2639
2640static ssize_t mtip_hw_show_flags(struct device *dev,
2641 struct device_attribute *attr,
2642 char *buf)
2643{
2644 struct driver_data *dd = dev_to_disk(dev)->private_data;
2645 int size = 0;
2646
2647 size += sprintf(&buf[size], "Flag in port struct : [ %08lX ]\n",
2648 dd->port->flags);
2649 size += sprintf(&buf[size], "Flag in dd struct : [ %08lX ]\n",
2650 dd->dd_flag);
2253 2651
2254 return size; 2652 return size;
2255} 2653}
2256static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL); 2654
2655static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL);
2656static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
2657static DEVICE_ATTR(flags, S_IRUGO, mtip_hw_show_flags, NULL);
2257 2658
2258/* 2659/*
2259 * Create the sysfs related attributes. 2660 * Create the sysfs related attributes.
@@ -2272,7 +2673,13 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
2272 2673
2273 if (sysfs_create_file(kobj, &dev_attr_registers.attr)) 2674 if (sysfs_create_file(kobj, &dev_attr_registers.attr))
2274 dev_warn(&dd->pdev->dev, 2675 dev_warn(&dd->pdev->dev,
2275 "Error creating registers sysfs entry\n"); 2676 "Error creating 'registers' sysfs entry\n");
2677 if (sysfs_create_file(kobj, &dev_attr_status.attr))
2678 dev_warn(&dd->pdev->dev,
2679 "Error creating 'status' sysfs entry\n");
2680 if (sysfs_create_file(kobj, &dev_attr_flags.attr))
2681 dev_warn(&dd->pdev->dev,
2682 "Error creating 'flags' sysfs entry\n");
2276 return 0; 2683 return 0;
2277} 2684}
2278 2685
@@ -2292,6 +2699,8 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
2292 return -EINVAL; 2699 return -EINVAL;
2293 2700
2294 sysfs_remove_file(kobj, &dev_attr_registers.attr); 2701 sysfs_remove_file(kobj, &dev_attr_registers.attr);
2702 sysfs_remove_file(kobj, &dev_attr_status.attr);
2703 sysfs_remove_file(kobj, &dev_attr_flags.attr);
2295 2704
2296 return 0; 2705 return 0;
2297} 2706}
@@ -2384,10 +2793,12 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2384 "FTL rebuild in progress. Polling for completion.\n"); 2793 "FTL rebuild in progress. Polling for completion.\n");
2385 2794
2386 start = jiffies; 2795 start = jiffies;
2387 dd->ftlrebuildflag = 1;
2388 timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS); 2796 timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
2389 2797
2390 do { 2798 do {
2799 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
2800 &dd->dd_flag)))
2801 return -EFAULT;
2391 if (mtip_check_surprise_removal(dd->pdev)) 2802 if (mtip_check_surprise_removal(dd->pdev))
2392 return -EFAULT; 2803 return -EFAULT;
2393 2804
@@ -2408,22 +2819,17 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2408 dev_warn(&dd->pdev->dev, 2819 dev_warn(&dd->pdev->dev,
2409 "FTL rebuild complete (%d secs).\n", 2820 "FTL rebuild complete (%d secs).\n",
2410 jiffies_to_msecs(jiffies - start) / 1000); 2821 jiffies_to_msecs(jiffies - start) / 1000);
2411 dd->ftlrebuildflag = 0;
2412 mtip_block_initialize(dd); 2822 mtip_block_initialize(dd);
2413 break; 2823 return 0;
2414 } 2824 }
2415 ssleep(10); 2825 ssleep(10);
2416 } while (time_before(jiffies, timeout)); 2826 } while (time_before(jiffies, timeout));
2417 2827
2418 /* Check for timeout */ 2828 /* Check for timeout */
2419 if (dd->ftlrebuildflag) { 2829 dev_err(&dd->pdev->dev,
2420 dev_err(&dd->pdev->dev,
2421 "Timed out waiting for FTL rebuild to complete (%d secs).\n", 2830 "Timed out waiting for FTL rebuild to complete (%d secs).\n",
2422 jiffies_to_msecs(jiffies - start) / 1000); 2831 jiffies_to_msecs(jiffies - start) / 1000);
2423 return -EFAULT; 2832 return -EFAULT;
2424 }
2425
2426 return 0;
2427} 2833}
2428 2834
2429/* 2835/*
@@ -2448,14 +2854,17 @@ static int mtip_service_thread(void *data)
2448 * is in progress nor error handling is active 2854 * is in progress nor error handling is active
2449 */ 2855 */
2450 wait_event_interruptible(port->svc_wait, (port->flags) && 2856 wait_event_interruptible(port->svc_wait, (port->flags) &&
2451 !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && 2857 !(port->flags & MTIP_PF_PAUSE_IO));
2452 !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
2453 2858
2454 if (kthread_should_stop()) 2859 if (kthread_should_stop())
2455 break; 2860 break;
2456 2861
2457 set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); 2862 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
2458 if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { 2863 &dd->dd_flag)))
2864 break;
2865
2866 set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
2867 if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
2459 slot = 1; 2868 slot = 1;
2460 /* used to restrict the loop to one iteration */ 2869 /* used to restrict the loop to one iteration */
2461 slot_start = num_cmd_slots; 2870 slot_start = num_cmd_slots;
@@ -2480,21 +2889,19 @@ static int mtip_service_thread(void *data)
2480 /* Issue the command to the hardware */ 2889 /* Issue the command to the hardware */
2481 mtip_issue_ncq_command(port, slot); 2890 mtip_issue_ncq_command(port, slot);
2482 2891
2483 /* Set the command's timeout value.*/
2484 port->commands[slot].comp_time = jiffies +
2485 msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
2486
2487 clear_bit(slot, port->cmds_to_issue); 2892 clear_bit(slot, port->cmds_to_issue);
2488 } 2893 }
2489 2894
2490 clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); 2895 clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2491 } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) { 2896 } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
2492 mtip_ftl_rebuild_poll(dd); 2897 if (!mtip_ftl_rebuild_poll(dd))
2493 clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags); 2898 set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
2899 &dd->dd_flag);
2900 clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
2494 } 2901 }
2495 clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); 2902 clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
2496 2903
2497 if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags)) 2904 if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
2498 break; 2905 break;
2499 } 2906 }
2500 return 0; 2907 return 0;
@@ -2513,6 +2920,9 @@ static int mtip_hw_init(struct driver_data *dd)
2513 int i; 2920 int i;
2514 int rv; 2921 int rv;
2515 unsigned int num_command_slots; 2922 unsigned int num_command_slots;
2923 unsigned long timeout, timetaken;
2924 unsigned char *buf;
2925 struct smart_attr attr242;
2516 2926
2517 dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; 2927 dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
2518 2928
@@ -2547,7 +2957,7 @@ static int mtip_hw_init(struct driver_data *dd)
2547 /* Allocate memory for the command list. */ 2957 /* Allocate memory for the command list. */
2548 dd->port->command_list = 2958 dd->port->command_list =
2549 dmam_alloc_coherent(&dd->pdev->dev, 2959 dmam_alloc_coherent(&dd->pdev->dev,
2550 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), 2960 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
2551 &dd->port->command_list_dma, 2961 &dd->port->command_list_dma,
2552 GFP_KERNEL); 2962 GFP_KERNEL);
2553 if (!dd->port->command_list) { 2963 if (!dd->port->command_list) {
@@ -2560,7 +2970,7 @@ static int mtip_hw_init(struct driver_data *dd)
2560 /* Clear the memory we have allocated. */ 2970 /* Clear the memory we have allocated. */
2561 memset(dd->port->command_list, 2971 memset(dd->port->command_list,
2562 0, 2972 0,
2563 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2)); 2973 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
2564 2974
2565 /* Setup the addresse of the RX FIS. */ 2975 /* Setup the addresse of the RX FIS. */
2566 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ; 2976 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
@@ -2576,10 +2986,19 @@ static int mtip_hw_init(struct driver_data *dd)
2576 dd->port->identify_dma = dd->port->command_tbl_dma + 2986 dd->port->identify_dma = dd->port->command_tbl_dma +
2577 HW_CMD_TBL_AR_SZ; 2987 HW_CMD_TBL_AR_SZ;
2578 2988
2579 /* Setup the address of the sector buffer. */ 2989 /* Setup the address of the sector buffer - for some non-ncq cmds */
2580 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE; 2990 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
2581 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE; 2991 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
2582 2992
2993 /* Setup the address of the log buf - for read log command */
2994 dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
2995 dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
2996
2997 /* Setup the address of the smart buf - for smart read data command */
2998 dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
2999 dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3000
3001
2583 /* Point the command headers at the command tables. */ 3002 /* Point the command headers at the command tables. */
2584 for (i = 0; i < num_command_slots; i++) { 3003 for (i = 0; i < num_command_slots; i++) {
2585 dd->port->commands[i].command_header = 3004 dd->port->commands[i].command_header =
@@ -2623,14 +3042,43 @@ static int mtip_hw_init(struct driver_data *dd)
2623 dd->port->mmio + i*0x80 + PORT_SDBV; 3042 dd->port->mmio + i*0x80 + PORT_SDBV;
2624 } 3043 }
2625 3044
2626 /* Reset the HBA. */ 3045 timetaken = jiffies;
2627 if (mtip_hba_reset(dd) < 0) { 3046 timeout = jiffies + msecs_to_jiffies(30000);
2628 dev_err(&dd->pdev->dev, 3047 while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
2629 "Card did not reset within timeout\n"); 3048 time_before(jiffies, timeout)) {
2630 rv = -EIO; 3049 mdelay(100);
3050 }
3051 if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
3052 timetaken = jiffies - timetaken;
3053 dev_warn(&dd->pdev->dev,
3054 "Surprise removal detected at %u ms\n",
3055 jiffies_to_msecs(timetaken));
3056 rv = -ENODEV;
3057 goto out2 ;
3058 }
3059 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
3060 timetaken = jiffies - timetaken;
3061 dev_warn(&dd->pdev->dev,
3062 "Removal detected at %u ms\n",
3063 jiffies_to_msecs(timetaken));
3064 rv = -EFAULT;
2631 goto out2; 3065 goto out2;
2632 } 3066 }
2633 3067
3068 /* Conditionally reset the HBA. */
3069 if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
3070 if (mtip_hba_reset(dd) < 0) {
3071 dev_err(&dd->pdev->dev,
3072 "Card did not reset within timeout\n");
3073 rv = -EIO;
3074 goto out2;
3075 }
3076 } else {
3077 /* Clear any pending interrupts on the HBA */
3078 writel(readl(dd->mmio + HOST_IRQ_STAT),
3079 dd->mmio + HOST_IRQ_STAT);
3080 }
3081
2634 mtip_init_port(dd->port); 3082 mtip_init_port(dd->port);
2635 mtip_start_port(dd->port); 3083 mtip_start_port(dd->port);
2636 3084
@@ -2660,6 +3108,12 @@ static int mtip_hw_init(struct driver_data *dd)
2660 mod_timer(&dd->port->cmd_timer, 3108 mod_timer(&dd->port->cmd_timer,
2661 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); 3109 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
2662 3110
3111
3112 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
3113 rv = -EFAULT;
3114 goto out3;
3115 }
3116
2663 if (mtip_get_identify(dd->port, NULL) < 0) { 3117 if (mtip_get_identify(dd->port, NULL) < 0) {
2664 rv = -EFAULT; 3118 rv = -EFAULT;
2665 goto out3; 3119 goto out3;
@@ -2667,10 +3121,47 @@ static int mtip_hw_init(struct driver_data *dd)
2667 3121
2668 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == 3122 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2669 MTIP_FTL_REBUILD_MAGIC) { 3123 MTIP_FTL_REBUILD_MAGIC) {
2670 set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags); 3124 set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
2671 return MTIP_FTL_REBUILD_MAGIC; 3125 return MTIP_FTL_REBUILD_MAGIC;
2672 } 3126 }
2673 mtip_dump_identify(dd->port); 3127 mtip_dump_identify(dd->port);
3128
3129 /* check write protect, over temp and rebuild statuses */
3130 rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
3131 dd->port->log_buf,
3132 dd->port->log_buf_dma, 1);
3133 if (rv) {
3134 dev_warn(&dd->pdev->dev,
3135 "Error in READ LOG EXT (10h) command\n");
3136 /* non-critical error, don't fail the load */
3137 } else {
3138 buf = (unsigned char *)dd->port->log_buf;
3139 if (buf[259] & 0x1) {
3140 dev_info(&dd->pdev->dev,
3141 "Write protect bit is set.\n");
3142 set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
3143 }
3144 if (buf[288] == 0xF7) {
3145 dev_info(&dd->pdev->dev,
3146 "Exceeded Tmax, drive in thermal shutdown.\n");
3147 set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
3148 }
3149 if (buf[288] == 0xBF) {
3150 dev_info(&dd->pdev->dev,
3151 "Drive indicates rebuild has failed.\n");
3152 /* TODO */
3153 }
3154 }
3155
3156 /* get write protect progess */
3157 memset(&attr242, 0, sizeof(struct smart_attr));
3158 if (mtip_get_smart_attr(dd->port, 242, &attr242))
3159 dev_warn(&dd->pdev->dev,
3160 "Unable to check write protect progress\n");
3161 else
3162 dev_info(&dd->pdev->dev,
3163 "Write protect progress: %d%% (%d blocks)\n",
3164 attr242.cur, attr242.data);
2674 return rv; 3165 return rv;
2675 3166
2676out3: 3167out3:
@@ -2688,7 +3179,7 @@ out2:
2688 3179
2689 /* Free the command/command header memory. */ 3180 /* Free the command/command header memory. */
2690 dmam_free_coherent(&dd->pdev->dev, 3181 dmam_free_coherent(&dd->pdev->dev,
2691 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), 3182 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
2692 dd->port->command_list, 3183 dd->port->command_list,
2693 dd->port->command_list_dma); 3184 dd->port->command_list_dma);
2694out1: 3185out1:
@@ -2712,9 +3203,12 @@ static int mtip_hw_exit(struct driver_data *dd)
2712 * Send standby immediate (E0h) to the drive so that it 3203 * Send standby immediate (E0h) to the drive so that it
2713 * saves its state. 3204 * saves its state.
2714 */ 3205 */
2715 if (atomic_read(&dd->drv_cleanup_done) != true) { 3206 if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
2716 3207
2717 mtip_standby_immediate(dd->port); 3208 if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags))
3209 if (mtip_standby_immediate(dd->port))
3210 dev_warn(&dd->pdev->dev,
3211 "STANDBY IMMEDIATE failed\n");
2718 3212
2719 /* de-initialize the port. */ 3213 /* de-initialize the port. */
2720 mtip_deinit_port(dd->port); 3214 mtip_deinit_port(dd->port);
@@ -2734,7 +3228,7 @@ static int mtip_hw_exit(struct driver_data *dd)
2734 3228
2735 /* Free the command/command header memory. */ 3229 /* Free the command/command header memory. */
2736 dmam_free_coherent(&dd->pdev->dev, 3230 dmam_free_coherent(&dd->pdev->dev,
2737 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), 3231 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
2738 dd->port->command_list, 3232 dd->port->command_list,
2739 dd->port->command_list_dma); 3233 dd->port->command_list_dma);
2740 /* Free the memory allocated for the for structure. */ 3234 /* Free the memory allocated for the for structure. */
@@ -2892,6 +3386,9 @@ static int mtip_block_ioctl(struct block_device *dev,
2892 if (!dd) 3386 if (!dd)
2893 return -ENOTTY; 3387 return -ENOTTY;
2894 3388
3389 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3390 return -ENOTTY;
3391
2895 switch (cmd) { 3392 switch (cmd) {
2896 case BLKFLSBUF: 3393 case BLKFLSBUF:
2897 return -ENOTTY; 3394 return -ENOTTY;
@@ -2927,6 +3424,9 @@ static int mtip_block_compat_ioctl(struct block_device *dev,
2927 if (!dd) 3424 if (!dd)
2928 return -ENOTTY; 3425 return -ENOTTY;
2929 3426
3427 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3428 return -ENOTTY;
3429
2930 switch (cmd) { 3430 switch (cmd) {
2931 case BLKFLSBUF: 3431 case BLKFLSBUF:
2932 return -ENOTTY; 3432 return -ENOTTY;
@@ -3049,6 +3549,24 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3049 int nents = 0; 3549 int nents = 0;
3050 int tag = 0; 3550 int tag = 0;
3051 3551
3552 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
3553 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3554 &dd->dd_flag))) {
3555 bio_endio(bio, -ENXIO);
3556 return;
3557 }
3558 if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
3559 bio_endio(bio, -ENODATA);
3560 return;
3561 }
3562 if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
3563 &dd->dd_flag) &&
3564 bio_data_dir(bio))) {
3565 bio_endio(bio, -ENODATA);
3566 return;
3567 }
3568 }
3569
3052 if (unlikely(!bio_has_data(bio))) { 3570 if (unlikely(!bio_has_data(bio))) {
3053 blk_queue_flush(queue, 0); 3571 blk_queue_flush(queue, 0);
3054 bio_endio(bio, 0); 3572 bio_endio(bio, 0);
@@ -3061,7 +3579,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3061 3579
3062 if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { 3580 if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
3063 dev_warn(&dd->pdev->dev, 3581 dev_warn(&dd->pdev->dev,
3064 "Maximum number of SGL entries exceeded"); 3582 "Maximum number of SGL entries exceeded\n");
3065 bio_io_error(bio); 3583 bio_io_error(bio);
3066 mtip_hw_release_scatterlist(dd, tag); 3584 mtip_hw_release_scatterlist(dd, tag);
3067 return; 3585 return;
@@ -3181,7 +3699,10 @@ skip_create_disk:
3181 set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); 3699 set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
3182 blk_queue_max_segments(dd->queue, MTIP_MAX_SG); 3700 blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3183 blk_queue_physical_block_size(dd->queue, 4096); 3701 blk_queue_physical_block_size(dd->queue, 4096);
3702 blk_queue_max_hw_sectors(dd->queue, 0xffff);
3703 blk_queue_max_segment_size(dd->queue, 0x400000);
3184 blk_queue_io_min(dd->queue, 4096); 3704 blk_queue_io_min(dd->queue, 4096);
3705
3185 /* 3706 /*
3186 * write back cache is not supported in the device. FUA depends on 3707 * write back cache is not supported in the device. FUA depends on
3187 * write back cache support, hence setting flush support to zero. 3708 * write back cache support, hence setting flush support to zero.
@@ -3210,8 +3731,10 @@ skip_create_disk:
3210 kobject_put(kobj); 3731 kobject_put(kobj);
3211 } 3732 }
3212 3733
3213 if (dd->mtip_svc_handler) 3734 if (dd->mtip_svc_handler) {
3735 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
3214 return rv; /* service thread created for handling rebuild */ 3736 return rv; /* service thread created for handling rebuild */
3737 }
3215 3738
3216start_service_thread: 3739start_service_thread:
3217 sprintf(thd_name, "mtip_svc_thd_%02d", index); 3740 sprintf(thd_name, "mtip_svc_thd_%02d", index);
@@ -3220,12 +3743,15 @@ start_service_thread:
3220 dd, thd_name); 3743 dd, thd_name);
3221 3744
3222 if (IS_ERR(dd->mtip_svc_handler)) { 3745 if (IS_ERR(dd->mtip_svc_handler)) {
3223 printk(KERN_ERR "mtip32xx: service thread failed to start\n"); 3746 dev_err(&dd->pdev->dev, "service thread failed to start\n");
3224 dd->mtip_svc_handler = NULL; 3747 dd->mtip_svc_handler = NULL;
3225 rv = -EFAULT; 3748 rv = -EFAULT;
3226 goto kthread_run_error; 3749 goto kthread_run_error;
3227 } 3750 }
3228 3751
3752 if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3753 rv = wait_for_rebuild;
3754
3229 return rv; 3755 return rv;
3230 3756
3231kthread_run_error: 3757kthread_run_error:
@@ -3266,16 +3792,18 @@ static int mtip_block_remove(struct driver_data *dd)
3266 struct kobject *kobj; 3792 struct kobject *kobj;
3267 3793
3268 if (dd->mtip_svc_handler) { 3794 if (dd->mtip_svc_handler) {
3269 set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags); 3795 set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
3270 wake_up_interruptible(&dd->port->svc_wait); 3796 wake_up_interruptible(&dd->port->svc_wait);
3271 kthread_stop(dd->mtip_svc_handler); 3797 kthread_stop(dd->mtip_svc_handler);
3272 } 3798 }
3273 3799
3274 /* Clean up the sysfs attributes managed by the protocol layer. */ 3800 /* Clean up the sysfs attributes, if created */
3275 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); 3801 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
3276 if (kobj) { 3802 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3277 mtip_hw_sysfs_exit(dd, kobj); 3803 if (kobj) {
3278 kobject_put(kobj); 3804 mtip_hw_sysfs_exit(dd, kobj);
3805 kobject_put(kobj);
3806 }
3279 } 3807 }
3280 3808
3281 /* 3809 /*
@@ -3283,6 +3811,11 @@ static int mtip_block_remove(struct driver_data *dd)
3283 * from /dev 3811 * from /dev
3284 */ 3812 */
3285 del_gendisk(dd->disk); 3813 del_gendisk(dd->disk);
3814
3815 spin_lock(&rssd_index_lock);
3816 ida_remove(&rssd_index_ida, dd->index);
3817 spin_unlock(&rssd_index_lock);
3818
3286 blk_cleanup_queue(dd->queue); 3819 blk_cleanup_queue(dd->queue);
3287 dd->disk = NULL; 3820 dd->disk = NULL;
3288 dd->queue = NULL; 3821 dd->queue = NULL;
@@ -3312,6 +3845,11 @@ static int mtip_block_shutdown(struct driver_data *dd)
3312 3845
3313 /* Delete our gendisk structure, and cleanup the blk queue. */ 3846 /* Delete our gendisk structure, and cleanup the blk queue. */
3314 del_gendisk(dd->disk); 3847 del_gendisk(dd->disk);
3848
3849 spin_lock(&rssd_index_lock);
3850 ida_remove(&rssd_index_ida, dd->index);
3851 spin_unlock(&rssd_index_lock);
3852
3315 blk_cleanup_queue(dd->queue); 3853 blk_cleanup_queue(dd->queue);
3316 dd->disk = NULL; 3854 dd->disk = NULL;
3317 dd->queue = NULL; 3855 dd->queue = NULL;
@@ -3359,11 +3897,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
3359 return -ENOMEM; 3897 return -ENOMEM;
3360 } 3898 }
3361 3899
3362 /* Set the atomic variable as 1 in case of SRSI */
3363 atomic_set(&dd->drv_cleanup_done, true);
3364
3365 atomic_set(&dd->resumeflag, false);
3366
3367 /* Attach the private data to this PCI device. */ 3900 /* Attach the private data to this PCI device. */
3368 pci_set_drvdata(pdev, dd); 3901 pci_set_drvdata(pdev, dd);
3369 3902
@@ -3420,7 +3953,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
3420 * instance number. 3953 * instance number.
3421 */ 3954 */
3422 instance++; 3955 instance++;
3423 3956 if (rv != MTIP_FTL_REBUILD_MAGIC)
3957 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
3424 goto done; 3958 goto done;
3425 3959
3426block_initialize_err: 3960block_initialize_err:
@@ -3434,9 +3968,6 @@ iomap_err:
3434 pci_set_drvdata(pdev, NULL); 3968 pci_set_drvdata(pdev, NULL);
3435 return rv; 3969 return rv;
3436done: 3970done:
3437 /* Set the atomic variable as 0 in case of SRSI */
3438 atomic_set(&dd->drv_cleanup_done, true);
3439
3440 return rv; 3971 return rv;
3441} 3972}
3442 3973
@@ -3452,8 +3983,10 @@ static void mtip_pci_remove(struct pci_dev *pdev)
3452 struct driver_data *dd = pci_get_drvdata(pdev); 3983 struct driver_data *dd = pci_get_drvdata(pdev);
3453 int counter = 0; 3984 int counter = 0;
3454 3985
3986 set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
3987
3455 if (mtip_check_surprise_removal(pdev)) { 3988 if (mtip_check_surprise_removal(pdev)) {
3456 while (atomic_read(&dd->drv_cleanup_done) == false) { 3989 while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
3457 counter++; 3990 counter++;
3458 msleep(20); 3991 msleep(20);
3459 if (counter == 10) { 3992 if (counter == 10) {
@@ -3463,8 +3996,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
3463 } 3996 }
3464 } 3997 }
3465 } 3998 }
3466 /* Set the atomic variable as 1 in case of SRSI */
3467 atomic_set(&dd->drv_cleanup_done, true);
3468 3999
3469 /* Clean up the block layer. */ 4000 /* Clean up the block layer. */
3470 mtip_block_remove(dd); 4001 mtip_block_remove(dd);
@@ -3493,7 +4024,7 @@ static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
3493 return -EFAULT; 4024 return -EFAULT;
3494 } 4025 }
3495 4026
3496 atomic_set(&dd->resumeflag, true); 4027 set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
3497 4028
3498 /* Disable ports & interrupts then send standby immediate */ 4029 /* Disable ports & interrupts then send standby immediate */
3499 rv = mtip_block_suspend(dd); 4030 rv = mtip_block_suspend(dd);
@@ -3559,7 +4090,7 @@ static int mtip_pci_resume(struct pci_dev *pdev)
3559 dev_err(&pdev->dev, "Unable to resume\n"); 4090 dev_err(&pdev->dev, "Unable to resume\n");
3560 4091
3561err: 4092err:
3562 atomic_set(&dd->resumeflag, false); 4093 clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
3563 4094
3564 return rv; 4095 return rv;
3565} 4096}
@@ -3608,18 +4139,25 @@ MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
3608 */ 4139 */
3609static int __init mtip_init(void) 4140static int __init mtip_init(void)
3610{ 4141{
4142 int error;
4143
3611 printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); 4144 printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
3612 4145
3613 /* Allocate a major block device number to use with this driver. */ 4146 /* Allocate a major block device number to use with this driver. */
3614 mtip_major = register_blkdev(0, MTIP_DRV_NAME); 4147 error = register_blkdev(0, MTIP_DRV_NAME);
3615 if (mtip_major < 0) { 4148 if (error <= 0) {
3616 printk(KERN_ERR "Unable to register block device (%d)\n", 4149 printk(KERN_ERR "Unable to register block device (%d)\n",
3617 mtip_major); 4150 error);
3618 return -EBUSY; 4151 return -EBUSY;
3619 } 4152 }
4153 mtip_major = error;
3620 4154
3621 /* Register our PCI operations. */ 4155 /* Register our PCI operations. */
3622 return pci_register_driver(&mtip_pci_driver); 4156 error = pci_register_driver(&mtip_pci_driver);
4157 if (error)
4158 unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4159
4160 return error;
3623} 4161}
3624 4162
3625/* 4163/*
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e0554a8f2233..b2c88da26b2a 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -34,8 +34,8 @@
34/* offset of Device Control register in PCIe extended capabilites space */ 34/* offset of Device Control register in PCIe extended capabilites space */
35#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 35#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
36 36
37/* # of times to retry timed out IOs */ 37/* # of times to retry timed out/failed IOs */
38#define MTIP_MAX_RETRIES 5 38#define MTIP_MAX_RETRIES 2
39 39
40/* Various timeout values in ms */ 40/* Various timeout values in ms */
41#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 41#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
@@ -113,13 +113,44 @@
113 113
114#define __force_bit2int (unsigned int __force) 114#define __force_bit2int (unsigned int __force)
115 115
116/* below are bit numbers in 'flags' defined in mtip_port */ 116enum {
117#define MTIP_FLAG_IC_ACTIVE_BIT 0 117 /* below are bit numbers in 'flags' defined in mtip_port */
118#define MTIP_FLAG_EH_ACTIVE_BIT 1 118 MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */
119#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2 119 MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */
120#define MTIP_FLAG_ISSUE_CMDS_BIT 4 120 MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */
121#define MTIP_FLAG_REBUILD_BIT 5 121 MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */
122#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8 122 MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \
123 (1 << MTIP_PF_EH_ACTIVE_BIT) | \
124 (1 << MTIP_PF_SE_ACTIVE_BIT) | \
125 (1 << MTIP_PF_DM_ACTIVE_BIT)),
126
127 MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
128 MTIP_PF_ISSUE_CMDS_BIT = 5,
129 MTIP_PF_REBUILD_BIT = 6,
130 MTIP_PF_SVC_THD_STOP_BIT = 8,
131
132 /* below are bit numbers in 'dd_flag' defined in driver_data */
133 MTIP_DDF_REMOVE_PENDING_BIT = 1,
134 MTIP_DDF_OVER_TEMP_BIT = 2,
135 MTIP_DDF_WRITE_PROTECT_BIT = 3,
136 MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \
137 (1 << MTIP_DDF_OVER_TEMP_BIT) | \
138 (1 << MTIP_DDF_WRITE_PROTECT_BIT)),
139
140 MTIP_DDF_CLEANUP_BIT = 5,
141 MTIP_DDF_RESUME_BIT = 6,
142 MTIP_DDF_INIT_DONE_BIT = 7,
143 MTIP_DDF_REBUILD_FAILED_BIT = 8,
144};
145
146__packed struct smart_attr{
147 u8 attr_id;
148 u16 flags;
149 u8 cur;
150 u8 worst;
151 u32 data;
152 u8 res[3];
153};
123 154
124/* Register Frame Information Structure (FIS), host to device. */ 155/* Register Frame Information Structure (FIS), host to device. */
125struct host_to_dev_fis { 156struct host_to_dev_fis {
@@ -345,6 +376,12 @@ struct mtip_port {
345 * when the command slot and all associated data structures 376 * when the command slot and all associated data structures
346 * are no longer needed. 377 * are no longer needed.
347 */ 378 */
379 u16 *log_buf;
380 dma_addr_t log_buf_dma;
381
382 u8 *smart_buf;
383 dma_addr_t smart_buf_dma;
384
348 unsigned long allocated[SLOTBITS_IN_LONGS]; 385 unsigned long allocated[SLOTBITS_IN_LONGS];
349 /* 386 /*
350 * used to queue commands when an internal command is in progress 387 * used to queue commands when an internal command is in progress
@@ -368,6 +405,7 @@ struct mtip_port {
368 * Timer used to complete commands that have been active for too long. 405 * Timer used to complete commands that have been active for too long.
369 */ 406 */
370 struct timer_list cmd_timer; 407 struct timer_list cmd_timer;
408 unsigned long ic_pause_timer;
371 /* 409 /*
372 * Semaphore used to block threads if there are no 410 * Semaphore used to block threads if there are no
373 * command slots available. 411 * command slots available.
@@ -404,13 +442,9 @@ struct driver_data {
404 442
405 unsigned slot_groups; /* number of slot groups the product supports */ 443 unsigned slot_groups; /* number of slot groups the product supports */
406 444
407 atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
408
409 unsigned long index; /* Index to determine the disk name */ 445 unsigned long index; /* Index to determine the disk name */
410 446
411 unsigned int ftlrebuildflag; /* FTL rebuild flag */ 447 unsigned long dd_flag; /* NOTE: use atomic bit operations on this */
412
413 atomic_t resumeflag; /* Atomic variable to track suspend/resume */
414 448
415 struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ 449 struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
416}; 450};
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c3f0ee16594d..061427a75d37 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -34,12 +34,11 @@
34#include <linux/kthread.h> 34#include <linux/kthread.h>
35 35
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/types.h> 37#include <asm/types.h>
39 38
40#include <linux/nbd.h> 39#include <linux/nbd.h>
41 40
42#define LO_MAGIC 0x68797548 41#define NBD_MAGIC 0x68797548
43 42
44#ifdef NDEBUG 43#ifdef NDEBUG
45#define dprintk(flags, fmt...) 44#define dprintk(flags, fmt...)
@@ -116,7 +115,7 @@ static void nbd_end_request(struct request *req)
116 spin_unlock_irqrestore(q->queue_lock, flags); 115 spin_unlock_irqrestore(q->queue_lock, flags);
117} 116}
118 117
119static void sock_shutdown(struct nbd_device *lo, int lock) 118static void sock_shutdown(struct nbd_device *nbd, int lock)
120{ 119{
121 /* Forcibly shutdown the socket causing all listeners 120 /* Forcibly shutdown the socket causing all listeners
122 * to error 121 * to error
@@ -125,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
125 * there should be a more generic interface rather than 124 * there should be a more generic interface rather than
126 * calling socket ops directly here */ 125 * calling socket ops directly here */
127 if (lock) 126 if (lock)
128 mutex_lock(&lo->tx_lock); 127 mutex_lock(&nbd->tx_lock);
129 if (lo->sock) { 128 if (nbd->sock) {
130 dev_warn(disk_to_dev(lo->disk), "shutting down socket\n"); 129 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
131 kernel_sock_shutdown(lo->sock, SHUT_RDWR); 130 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
132 lo->sock = NULL; 131 nbd->sock = NULL;
133 } 132 }
134 if (lock) 133 if (lock)
135 mutex_unlock(&lo->tx_lock); 134 mutex_unlock(&nbd->tx_lock);
136} 135}
137 136
138static void nbd_xmit_timeout(unsigned long arg) 137static void nbd_xmit_timeout(unsigned long arg)
@@ -147,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg)
147/* 146/*
148 * Send or receive packet. 147 * Send or receive packet.
149 */ 148 */
150static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, 149static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
151 int msg_flags) 150 int msg_flags)
152{ 151{
153 struct socket *sock = lo->sock; 152 struct socket *sock = nbd->sock;
154 int result; 153 int result;
155 struct msghdr msg; 154 struct msghdr msg;
156 struct kvec iov; 155 struct kvec iov;
157 sigset_t blocked, oldset; 156 sigset_t blocked, oldset;
158 157
159 if (unlikely(!sock)) { 158 if (unlikely(!sock)) {
160 dev_err(disk_to_dev(lo->disk), 159 dev_err(disk_to_dev(nbd->disk),
161 "Attempted %s on closed socket in sock_xmit\n", 160 "Attempted %s on closed socket in sock_xmit\n",
162 (send ? "send" : "recv")); 161 (send ? "send" : "recv"));
163 return -EINVAL; 162 return -EINVAL;
@@ -181,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
181 if (send) { 180 if (send) {
182 struct timer_list ti; 181 struct timer_list ti;
183 182
184 if (lo->xmit_timeout) { 183 if (nbd->xmit_timeout) {
185 init_timer(&ti); 184 init_timer(&ti);
186 ti.function = nbd_xmit_timeout; 185 ti.function = nbd_xmit_timeout;
187 ti.data = (unsigned long)current; 186 ti.data = (unsigned long)current;
188 ti.expires = jiffies + lo->xmit_timeout; 187 ti.expires = jiffies + nbd->xmit_timeout;
189 add_timer(&ti); 188 add_timer(&ti);
190 } 189 }
191 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 190 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
192 if (lo->xmit_timeout) 191 if (nbd->xmit_timeout)
193 del_timer_sync(&ti); 192 del_timer_sync(&ti);
194 } else 193 } else
195 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 194 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
@@ -201,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
201 task_pid_nr(current), current->comm, 200 task_pid_nr(current), current->comm,
202 dequeue_signal_lock(current, &current->blocked, &info)); 201 dequeue_signal_lock(current, &current->blocked, &info));
203 result = -EINTR; 202 result = -EINTR;
204 sock_shutdown(lo, !send); 203 sock_shutdown(nbd, !send);
205 break; 204 break;
206 } 205 }
207 206
@@ -219,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
219 return result; 218 return result;
220} 219}
221 220
222static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec, 221static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
223 int flags) 222 int flags)
224{ 223{
225 int result; 224 int result;
226 void *kaddr = kmap(bvec->bv_page); 225 void *kaddr = kmap(bvec->bv_page);
227 result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); 226 result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
227 bvec->bv_len, flags);
228 kunmap(bvec->bv_page); 228 kunmap(bvec->bv_page);
229 return result; 229 return result;
230} 230}
231 231
232/* always call with the tx_lock held */ 232/* always call with the tx_lock held */
233static int nbd_send_req(struct nbd_device *lo, struct request *req) 233static int nbd_send_req(struct nbd_device *nbd, struct request *req)
234{ 234{
235 int result, flags; 235 int result, flags;
236 struct nbd_request request; 236 struct nbd_request request;
@@ -243,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
243 memcpy(request.handle, &req, sizeof(req)); 243 memcpy(request.handle, &req, sizeof(req));
244 244
245 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", 245 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
246 lo->disk->disk_name, req, 246 nbd->disk->disk_name, req,
247 nbdcmd_to_ascii(nbd_cmd(req)), 247 nbdcmd_to_ascii(nbd_cmd(req)),
248 (unsigned long long)blk_rq_pos(req) << 9, 248 (unsigned long long)blk_rq_pos(req) << 9,
249 blk_rq_bytes(req)); 249 blk_rq_bytes(req));
250 result = sock_xmit(lo, 1, &request, sizeof(request), 250 result = sock_xmit(nbd, 1, &request, sizeof(request),
251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
252 if (result <= 0) { 252 if (result <= 0) {
253 dev_err(disk_to_dev(lo->disk), 253 dev_err(disk_to_dev(nbd->disk),
254 "Send control failed (result %d)\n", result); 254 "Send control failed (result %d)\n", result);
255 goto error_out; 255 goto error_out;
256 } 256 }
@@ -267,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
267 if (!rq_iter_last(req, iter)) 267 if (!rq_iter_last(req, iter))
268 flags = MSG_MORE; 268 flags = MSG_MORE;
269 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 269 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
270 lo->disk->disk_name, req, bvec->bv_len); 270 nbd->disk->disk_name, req, bvec->bv_len);
271 result = sock_send_bvec(lo, bvec, flags); 271 result = sock_send_bvec(nbd, bvec, flags);
272 if (result <= 0) { 272 if (result <= 0) {
273 dev_err(disk_to_dev(lo->disk), 273 dev_err(disk_to_dev(nbd->disk),
274 "Send data failed (result %d)\n", 274 "Send data failed (result %d)\n",
275 result); 275 result);
276 goto error_out; 276 goto error_out;
@@ -283,25 +283,25 @@ error_out:
283 return -EIO; 283 return -EIO;
284} 284}
285 285
286static struct request *nbd_find_request(struct nbd_device *lo, 286static struct request *nbd_find_request(struct nbd_device *nbd,
287 struct request *xreq) 287 struct request *xreq)
288{ 288{
289 struct request *req, *tmp; 289 struct request *req, *tmp;
290 int err; 290 int err;
291 291
292 err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); 292 err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
293 if (unlikely(err)) 293 if (unlikely(err))
294 goto out; 294 goto out;
295 295
296 spin_lock(&lo->queue_lock); 296 spin_lock(&nbd->queue_lock);
297 list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) { 297 list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
298 if (req != xreq) 298 if (req != xreq)
299 continue; 299 continue;
300 list_del_init(&req->queuelist); 300 list_del_init(&req->queuelist);
301 spin_unlock(&lo->queue_lock); 301 spin_unlock(&nbd->queue_lock);
302 return req; 302 return req;
303 } 303 }
304 spin_unlock(&lo->queue_lock); 304 spin_unlock(&nbd->queue_lock);
305 305
306 err = -ENOENT; 306 err = -ENOENT;
307 307
@@ -309,78 +309,78 @@ out:
309 return ERR_PTR(err); 309 return ERR_PTR(err);
310} 310}
311 311
312static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec) 312static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
313{ 313{
314 int result; 314 int result;
315 void *kaddr = kmap(bvec->bv_page); 315 void *kaddr = kmap(bvec->bv_page);
316 result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len, 316 result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
317 MSG_WAITALL); 317 MSG_WAITALL);
318 kunmap(bvec->bv_page); 318 kunmap(bvec->bv_page);
319 return result; 319 return result;
320} 320}
321 321
322/* NULL returned = something went wrong, inform userspace */ 322/* NULL returned = something went wrong, inform userspace */
323static struct request *nbd_read_stat(struct nbd_device *lo) 323static struct request *nbd_read_stat(struct nbd_device *nbd)
324{ 324{
325 int result; 325 int result;
326 struct nbd_reply reply; 326 struct nbd_reply reply;
327 struct request *req; 327 struct request *req;
328 328
329 reply.magic = 0; 329 reply.magic = 0;
330 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); 330 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
331 if (result <= 0) { 331 if (result <= 0) {
332 dev_err(disk_to_dev(lo->disk), 332 dev_err(disk_to_dev(nbd->disk),
333 "Receive control failed (result %d)\n", result); 333 "Receive control failed (result %d)\n", result);
334 goto harderror; 334 goto harderror;
335 } 335 }
336 336
337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
338 dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n", 338 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
339 (unsigned long)ntohl(reply.magic)); 339 (unsigned long)ntohl(reply.magic));
340 result = -EPROTO; 340 result = -EPROTO;
341 goto harderror; 341 goto harderror;
342 } 342 }
343 343
344 req = nbd_find_request(lo, *(struct request **)reply.handle); 344 req = nbd_find_request(nbd, *(struct request **)reply.handle);
345 if (IS_ERR(req)) { 345 if (IS_ERR(req)) {
346 result = PTR_ERR(req); 346 result = PTR_ERR(req);
347 if (result != -ENOENT) 347 if (result != -ENOENT)
348 goto harderror; 348 goto harderror;
349 349
350 dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n", 350 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
351 reply.handle); 351 reply.handle);
352 result = -EBADR; 352 result = -EBADR;
353 goto harderror; 353 goto harderror;
354 } 354 }
355 355
356 if (ntohl(reply.error)) { 356 if (ntohl(reply.error)) {
357 dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n", 357 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
358 ntohl(reply.error)); 358 ntohl(reply.error));
359 req->errors++; 359 req->errors++;
360 return req; 360 return req;
361 } 361 }
362 362
363 dprintk(DBG_RX, "%s: request %p: got reply\n", 363 dprintk(DBG_RX, "%s: request %p: got reply\n",
364 lo->disk->disk_name, req); 364 nbd->disk->disk_name, req);
365 if (nbd_cmd(req) == NBD_CMD_READ) { 365 if (nbd_cmd(req) == NBD_CMD_READ) {
366 struct req_iterator iter; 366 struct req_iterator iter;
367 struct bio_vec *bvec; 367 struct bio_vec *bvec;
368 368
369 rq_for_each_segment(bvec, req, iter) { 369 rq_for_each_segment(bvec, req, iter) {
370 result = sock_recv_bvec(lo, bvec); 370 result = sock_recv_bvec(nbd, bvec);
371 if (result <= 0) { 371 if (result <= 0) {
372 dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n", 372 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
373 result); 373 result);
374 req->errors++; 374 req->errors++;
375 return req; 375 return req;
376 } 376 }
377 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", 377 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
378 lo->disk->disk_name, req, bvec->bv_len); 378 nbd->disk->disk_name, req, bvec->bv_len);
379 } 379 }
380 } 380 }
381 return req; 381 return req;
382harderror: 382harderror:
383 lo->harderror = result; 383 nbd->harderror = result;
384 return NULL; 384 return NULL;
385} 385}
386 386
@@ -398,48 +398,48 @@ static struct device_attribute pid_attr = {
398 .show = pid_show, 398 .show = pid_show,
399}; 399};
400 400
401static int nbd_do_it(struct nbd_device *lo) 401static int nbd_do_it(struct nbd_device *nbd)
402{ 402{
403 struct request *req; 403 struct request *req;
404 int ret; 404 int ret;
405 405
406 BUG_ON(lo->magic != LO_MAGIC); 406 BUG_ON(nbd->magic != NBD_MAGIC);
407 407
408 lo->pid = task_pid_nr(current); 408 nbd->pid = task_pid_nr(current);
409 ret = device_create_file(disk_to_dev(lo->disk), &pid_attr); 409 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
410 if (ret) { 410 if (ret) {
411 dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n"); 411 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
412 lo->pid = 0; 412 nbd->pid = 0;
413 return ret; 413 return ret;
414 } 414 }
415 415
416 while ((req = nbd_read_stat(lo)) != NULL) 416 while ((req = nbd_read_stat(nbd)) != NULL)
417 nbd_end_request(req); 417 nbd_end_request(req);
418 418
419 device_remove_file(disk_to_dev(lo->disk), &pid_attr); 419 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
420 lo->pid = 0; 420 nbd->pid = 0;
421 return 0; 421 return 0;
422} 422}
423 423
424static void nbd_clear_que(struct nbd_device *lo) 424static void nbd_clear_que(struct nbd_device *nbd)
425{ 425{
426 struct request *req; 426 struct request *req;
427 427
428 BUG_ON(lo->magic != LO_MAGIC); 428 BUG_ON(nbd->magic != NBD_MAGIC);
429 429
430 /* 430 /*
431 * Because we have set lo->sock to NULL under the tx_lock, all 431 * Because we have set nbd->sock to NULL under the tx_lock, all
432 * modifications to the list must have completed by now. For 432 * modifications to the list must have completed by now. For
433 * the same reason, the active_req must be NULL. 433 * the same reason, the active_req must be NULL.
434 * 434 *
435 * As a consequence, we don't need to take the spin lock while 435 * As a consequence, we don't need to take the spin lock while
436 * purging the list here. 436 * purging the list here.
437 */ 437 */
438 BUG_ON(lo->sock); 438 BUG_ON(nbd->sock);
439 BUG_ON(lo->active_req); 439 BUG_ON(nbd->active_req);
440 440
441 while (!list_empty(&lo->queue_head)) { 441 while (!list_empty(&nbd->queue_head)) {
442 req = list_entry(lo->queue_head.next, struct request, 442 req = list_entry(nbd->queue_head.next, struct request,
443 queuelist); 443 queuelist);
444 list_del_init(&req->queuelist); 444 list_del_init(&req->queuelist);
445 req->errors++; 445 req->errors++;
@@ -448,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo)
448} 448}
449 449
450 450
451static void nbd_handle_req(struct nbd_device *lo, struct request *req) 451static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
452{ 452{
453 if (req->cmd_type != REQ_TYPE_FS) 453 if (req->cmd_type != REQ_TYPE_FS)
454 goto error_out; 454 goto error_out;
@@ -456,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
456 nbd_cmd(req) = NBD_CMD_READ; 456 nbd_cmd(req) = NBD_CMD_READ;
457 if (rq_data_dir(req) == WRITE) { 457 if (rq_data_dir(req) == WRITE) {
458 nbd_cmd(req) = NBD_CMD_WRITE; 458 nbd_cmd(req) = NBD_CMD_WRITE;
459 if (lo->flags & NBD_READ_ONLY) { 459 if (nbd->flags & NBD_READ_ONLY) {
460 dev_err(disk_to_dev(lo->disk), 460 dev_err(disk_to_dev(nbd->disk),
461 "Write on read-only\n"); 461 "Write on read-only\n");
462 goto error_out; 462 goto error_out;
463 } 463 }
@@ -465,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
465 465
466 req->errors = 0; 466 req->errors = 0;
467 467
468 mutex_lock(&lo->tx_lock); 468 mutex_lock(&nbd->tx_lock);
469 if (unlikely(!lo->sock)) { 469 if (unlikely(!nbd->sock)) {
470 mutex_unlock(&lo->tx_lock); 470 mutex_unlock(&nbd->tx_lock);
471 dev_err(disk_to_dev(lo->disk), 471 dev_err(disk_to_dev(nbd->disk),
472 "Attempted send on closed socket\n"); 472 "Attempted send on closed socket\n");
473 goto error_out; 473 goto error_out;
474 } 474 }
475 475
476 lo->active_req = req; 476 nbd->active_req = req;
477 477
478 if (nbd_send_req(lo, req) != 0) { 478 if (nbd_send_req(nbd, req) != 0) {
479 dev_err(disk_to_dev(lo->disk), "Request send failed\n"); 479 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
480 req->errors++; 480 req->errors++;
481 nbd_end_request(req); 481 nbd_end_request(req);
482 } else { 482 } else {
483 spin_lock(&lo->queue_lock); 483 spin_lock(&nbd->queue_lock);
484 list_add(&req->queuelist, &lo->queue_head); 484 list_add(&req->queuelist, &nbd->queue_head);
485 spin_unlock(&lo->queue_lock); 485 spin_unlock(&nbd->queue_lock);
486 } 486 }
487 487
488 lo->active_req = NULL; 488 nbd->active_req = NULL;
489 mutex_unlock(&lo->tx_lock); 489 mutex_unlock(&nbd->tx_lock);
490 wake_up_all(&lo->active_wq); 490 wake_up_all(&nbd->active_wq);
491 491
492 return; 492 return;
493 493
@@ -498,28 +498,28 @@ error_out:
498 498
499static int nbd_thread(void *data) 499static int nbd_thread(void *data)
500{ 500{
501 struct nbd_device *lo = data; 501 struct nbd_device *nbd = data;
502 struct request *req; 502 struct request *req;
503 503
504 set_user_nice(current, -20); 504 set_user_nice(current, -20);
505 while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { 505 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
506 /* wait for something to do */ 506 /* wait for something to do */
507 wait_event_interruptible(lo->waiting_wq, 507 wait_event_interruptible(nbd->waiting_wq,
508 kthread_should_stop() || 508 kthread_should_stop() ||
509 !list_empty(&lo->waiting_queue)); 509 !list_empty(&nbd->waiting_queue));
510 510
511 /* extract request */ 511 /* extract request */
512 if (list_empty(&lo->waiting_queue)) 512 if (list_empty(&nbd->waiting_queue))
513 continue; 513 continue;
514 514
515 spin_lock_irq(&lo->queue_lock); 515 spin_lock_irq(&nbd->queue_lock);
516 req = list_entry(lo->waiting_queue.next, struct request, 516 req = list_entry(nbd->waiting_queue.next, struct request,
517 queuelist); 517 queuelist);
518 list_del_init(&req->queuelist); 518 list_del_init(&req->queuelist);
519 spin_unlock_irq(&lo->queue_lock); 519 spin_unlock_irq(&nbd->queue_lock);
520 520
521 /* handle request */ 521 /* handle request */
522 nbd_handle_req(lo, req); 522 nbd_handle_req(nbd, req);
523 } 523 }
524 return 0; 524 return 0;
525} 525}
@@ -527,7 +527,7 @@ static int nbd_thread(void *data)
527/* 527/*
528 * We always wait for result of write, for now. It would be nice to make it optional 528 * We always wait for result of write, for now. It would be nice to make it optional
529 * in future 529 * in future
530 * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) 530 * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
531 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } 531 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
532 */ 532 */
533 533
@@ -536,19 +536,19 @@ static void do_nbd_request(struct request_queue *q)
536 struct request *req; 536 struct request *req;
537 537
538 while ((req = blk_fetch_request(q)) != NULL) { 538 while ((req = blk_fetch_request(q)) != NULL) {
539 struct nbd_device *lo; 539 struct nbd_device *nbd;
540 540
541 spin_unlock_irq(q->queue_lock); 541 spin_unlock_irq(q->queue_lock);
542 542
543 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", 543 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
544 req->rq_disk->disk_name, req, req->cmd_type); 544 req->rq_disk->disk_name, req, req->cmd_type);
545 545
546 lo = req->rq_disk->private_data; 546 nbd = req->rq_disk->private_data;
547 547
548 BUG_ON(lo->magic != LO_MAGIC); 548 BUG_ON(nbd->magic != NBD_MAGIC);
549 549
550 if (unlikely(!lo->sock)) { 550 if (unlikely(!nbd->sock)) {
551 dev_err(disk_to_dev(lo->disk), 551 dev_err(disk_to_dev(nbd->disk),
552 "Attempted send on closed socket\n"); 552 "Attempted send on closed socket\n");
553 req->errors++; 553 req->errors++;
554 nbd_end_request(req); 554 nbd_end_request(req);
@@ -556,11 +556,11 @@ static void do_nbd_request(struct request_queue *q)
556 continue; 556 continue;
557 } 557 }
558 558
559 spin_lock_irq(&lo->queue_lock); 559 spin_lock_irq(&nbd->queue_lock);
560 list_add_tail(&req->queuelist, &lo->waiting_queue); 560 list_add_tail(&req->queuelist, &nbd->waiting_queue);
561 spin_unlock_irq(&lo->queue_lock); 561 spin_unlock_irq(&nbd->queue_lock);
562 562
563 wake_up(&lo->waiting_wq); 563 wake_up(&nbd->waiting_wq);
564 564
565 spin_lock_irq(q->queue_lock); 565 spin_lock_irq(q->queue_lock);
566 } 566 }
@@ -568,32 +568,32 @@ static void do_nbd_request(struct request_queue *q)
568 568
569/* Must be called with tx_lock held */ 569/* Must be called with tx_lock held */
570 570
571static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, 571static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
572 unsigned int cmd, unsigned long arg) 572 unsigned int cmd, unsigned long arg)
573{ 573{
574 switch (cmd) { 574 switch (cmd) {
575 case NBD_DISCONNECT: { 575 case NBD_DISCONNECT: {
576 struct request sreq; 576 struct request sreq;
577 577
578 dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n"); 578 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
579 579
580 blk_rq_init(NULL, &sreq); 580 blk_rq_init(NULL, &sreq);
581 sreq.cmd_type = REQ_TYPE_SPECIAL; 581 sreq.cmd_type = REQ_TYPE_SPECIAL;
582 nbd_cmd(&sreq) = NBD_CMD_DISC; 582 nbd_cmd(&sreq) = NBD_CMD_DISC;
583 if (!lo->sock) 583 if (!nbd->sock)
584 return -EINVAL; 584 return -EINVAL;
585 nbd_send_req(lo, &sreq); 585 nbd_send_req(nbd, &sreq);
586 return 0; 586 return 0;
587 } 587 }
588 588
589 case NBD_CLEAR_SOCK: { 589 case NBD_CLEAR_SOCK: {
590 struct file *file; 590 struct file *file;
591 591
592 lo->sock = NULL; 592 nbd->sock = NULL;
593 file = lo->file; 593 file = nbd->file;
594 lo->file = NULL; 594 nbd->file = NULL;
595 nbd_clear_que(lo); 595 nbd_clear_que(nbd);
596 BUG_ON(!list_empty(&lo->queue_head)); 596 BUG_ON(!list_empty(&nbd->queue_head));
597 if (file) 597 if (file)
598 fput(file); 598 fput(file);
599 return 0; 599 return 0;
@@ -601,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
601 601
602 case NBD_SET_SOCK: { 602 case NBD_SET_SOCK: {
603 struct file *file; 603 struct file *file;
604 if (lo->file) 604 if (nbd->file)
605 return -EBUSY; 605 return -EBUSY;
606 file = fget(arg); 606 file = fget(arg);
607 if (file) { 607 if (file) {
608 struct inode *inode = file->f_path.dentry->d_inode; 608 struct inode *inode = file->f_path.dentry->d_inode;
609 if (S_ISSOCK(inode->i_mode)) { 609 if (S_ISSOCK(inode->i_mode)) {
610 lo->file = file; 610 nbd->file = file;
611 lo->sock = SOCKET_I(inode); 611 nbd->sock = SOCKET_I(inode);
612 if (max_part > 0) 612 if (max_part > 0)
613 bdev->bd_invalidated = 1; 613 bdev->bd_invalidated = 1;
614 return 0; 614 return 0;
@@ -620,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
620 } 620 }
621 621
622 case NBD_SET_BLKSIZE: 622 case NBD_SET_BLKSIZE:
623 lo->blksize = arg; 623 nbd->blksize = arg;
624 lo->bytesize &= ~(lo->blksize-1); 624 nbd->bytesize &= ~(nbd->blksize-1);
625 bdev->bd_inode->i_size = lo->bytesize; 625 bdev->bd_inode->i_size = nbd->bytesize;
626 set_blocksize(bdev, lo->blksize); 626 set_blocksize(bdev, nbd->blksize);
627 set_capacity(lo->disk, lo->bytesize >> 9); 627 set_capacity(nbd->disk, nbd->bytesize >> 9);
628 return 0; 628 return 0;
629 629
630 case NBD_SET_SIZE: 630 case NBD_SET_SIZE:
631 lo->bytesize = arg & ~(lo->blksize-1); 631 nbd->bytesize = arg & ~(nbd->blksize-1);
632 bdev->bd_inode->i_size = lo->bytesize; 632 bdev->bd_inode->i_size = nbd->bytesize;
633 set_blocksize(bdev, lo->blksize); 633 set_blocksize(bdev, nbd->blksize);
634 set_capacity(lo->disk, lo->bytesize >> 9); 634 set_capacity(nbd->disk, nbd->bytesize >> 9);
635 return 0; 635 return 0;
636 636
637 case NBD_SET_TIMEOUT: 637 case NBD_SET_TIMEOUT:
638 lo->xmit_timeout = arg * HZ; 638 nbd->xmit_timeout = arg * HZ;
639 return 0; 639 return 0;
640 640
641 case NBD_SET_SIZE_BLOCKS: 641 case NBD_SET_SIZE_BLOCKS:
642 lo->bytesize = ((u64) arg) * lo->blksize; 642 nbd->bytesize = ((u64) arg) * nbd->blksize;
643 bdev->bd_inode->i_size = lo->bytesize; 643 bdev->bd_inode->i_size = nbd->bytesize;
644 set_blocksize(bdev, lo->blksize); 644 set_blocksize(bdev, nbd->blksize);
645 set_capacity(lo->disk, lo->bytesize >> 9); 645 set_capacity(nbd->disk, nbd->bytesize >> 9);
646 return 0; 646 return 0;
647 647
648 case NBD_DO_IT: { 648 case NBD_DO_IT: {
@@ -650,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
650 struct file *file; 650 struct file *file;
651 int error; 651 int error;
652 652
653 if (lo->pid) 653 if (nbd->pid)
654 return -EBUSY; 654 return -EBUSY;
655 if (!lo->file) 655 if (!nbd->file)
656 return -EINVAL; 656 return -EINVAL;
657 657
658 mutex_unlock(&lo->tx_lock); 658 mutex_unlock(&nbd->tx_lock);
659 659
660 thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); 660 thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
661 if (IS_ERR(thread)) { 661 if (IS_ERR(thread)) {
662 mutex_lock(&lo->tx_lock); 662 mutex_lock(&nbd->tx_lock);
663 return PTR_ERR(thread); 663 return PTR_ERR(thread);
664 } 664 }
665 wake_up_process(thread); 665 wake_up_process(thread);
666 error = nbd_do_it(lo); 666 error = nbd_do_it(nbd);
667 kthread_stop(thread); 667 kthread_stop(thread);
668 668
669 mutex_lock(&lo->tx_lock); 669 mutex_lock(&nbd->tx_lock);
670 if (error) 670 if (error)
671 return error; 671 return error;
672 sock_shutdown(lo, 0); 672 sock_shutdown(nbd, 0);
673 file = lo->file; 673 file = nbd->file;
674 lo->file = NULL; 674 nbd->file = NULL;
675 nbd_clear_que(lo); 675 nbd_clear_que(nbd);
676 dev_warn(disk_to_dev(lo->disk), "queue cleared\n"); 676 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
677 if (file) 677 if (file)
678 fput(file); 678 fput(file);
679 lo->bytesize = 0; 679 nbd->bytesize = 0;
680 bdev->bd_inode->i_size = 0; 680 bdev->bd_inode->i_size = 0;
681 set_capacity(lo->disk, 0); 681 set_capacity(nbd->disk, 0);
682 if (max_part > 0) 682 if (max_part > 0)
683 ioctl_by_bdev(bdev, BLKRRPART, 0); 683 ioctl_by_bdev(bdev, BLKRRPART, 0);
684 return lo->harderror; 684 return nbd->harderror;
685 } 685 }
686 686
687 case NBD_CLEAR_QUE: 687 case NBD_CLEAR_QUE:
@@ -689,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
689 * This is for compatibility only. The queue is always cleared 689 * This is for compatibility only. The queue is always cleared
690 * by NBD_DO_IT or NBD_CLEAR_SOCK. 690 * by NBD_DO_IT or NBD_CLEAR_SOCK.
691 */ 691 */
692 BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); 692 BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
693 return 0; 693 return 0;
694 694
695 case NBD_PRINT_DEBUG: 695 case NBD_PRINT_DEBUG:
696 dev_info(disk_to_dev(lo->disk), 696 dev_info(disk_to_dev(nbd->disk),
697 "next = %p, prev = %p, head = %p\n", 697 "next = %p, prev = %p, head = %p\n",
698 lo->queue_head.next, lo->queue_head.prev, 698 nbd->queue_head.next, nbd->queue_head.prev,
699 &lo->queue_head); 699 &nbd->queue_head);
700 return 0; 700 return 0;
701 } 701 }
702 return -ENOTTY; 702 return -ENOTTY;
@@ -705,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
705static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 705static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
706 unsigned int cmd, unsigned long arg) 706 unsigned int cmd, unsigned long arg)
707{ 707{
708 struct nbd_device *lo = bdev->bd_disk->private_data; 708 struct nbd_device *nbd = bdev->bd_disk->private_data;
709 int error; 709 int error;
710 710
711 if (!capable(CAP_SYS_ADMIN)) 711 if (!capable(CAP_SYS_ADMIN))
712 return -EPERM; 712 return -EPERM;
713 713
714 BUG_ON(lo->magic != LO_MAGIC); 714 BUG_ON(nbd->magic != NBD_MAGIC);
715 715
716 /* Anyone capable of this syscall can do *real bad* things */ 716 /* Anyone capable of this syscall can do *real bad* things */
717 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", 717 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
718 lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); 718 nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
719 719
720 mutex_lock(&lo->tx_lock); 720 mutex_lock(&nbd->tx_lock);
721 error = __nbd_ioctl(bdev, lo, cmd, arg); 721 error = __nbd_ioctl(bdev, nbd, cmd, arg);
722 mutex_unlock(&lo->tx_lock); 722 mutex_unlock(&nbd->tx_lock);
723 723
724 return error; 724 return error;
725} 725}
@@ -805,7 +805,7 @@ static int __init nbd_init(void)
805 for (i = 0; i < nbds_max; i++) { 805 for (i = 0; i < nbds_max; i++) {
806 struct gendisk *disk = nbd_dev[i].disk; 806 struct gendisk *disk = nbd_dev[i].disk;
807 nbd_dev[i].file = NULL; 807 nbd_dev[i].file = NULL;
808 nbd_dev[i].magic = LO_MAGIC; 808 nbd_dev[i].magic = NBD_MAGIC;
809 nbd_dev[i].flags = 0; 809 nbd_dev[i].flags = 0;
810 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 810 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
811 spin_lock_init(&nbd_dev[i].queue_lock); 811 spin_lock_init(&nbd_dev[i].queue_lock);
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 1f3c1a7d132a..38a2d0631882 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -39,7 +39,6 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/types.h> 41#include <linux/types.h>
42#include <linux/version.h>
43 42
44#include <asm-generic/io-64-nonatomic-lo-hi.h> 43#include <asm-generic/io-64-nonatomic-lo-hi.h>
45 44
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d59edeabd93f..ba66e4445f41 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag
987 987
988 while (copy_size > 0) { 988 while (copy_size > 0) {
989 struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); 989 struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
990 void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + 990 void *vfrom = kmap_atomic(src_bvl->bv_page) +
991 src_bvl->bv_offset + offs; 991 src_bvl->bv_offset + offs;
992 void *vto = page_address(dst_page) + dst_offs; 992 void *vto = page_address(dst_page) + dst_offs;
993 int len = min_t(int, copy_size, src_bvl->bv_len - offs); 993 int len = min_t(int, copy_size, src_bvl->bv_len - offs);
994 994
995 BUG_ON(len < 0); 995 BUG_ON(len < 0);
996 memcpy(vto, vfrom, len); 996 memcpy(vto, vfrom, len);
997 kunmap_atomic(vfrom, KM_USER0); 997 kunmap_atomic(vfrom);
998 998
999 seg++; 999 seg++;
1000 offs = 0; 1000 offs = 0;
@@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec)
1019 offs = 0; 1019 offs = 0;
1020 for (f = 0; f < pkt->frames; f++) { 1020 for (f = 0; f < pkt->frames; f++) {
1021 if (bvec[f].bv_page != pkt->pages[p]) { 1021 if (bvec[f].bv_page != pkt->pages[p]) {
1022 void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset; 1022 void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset;
1023 void *vto = page_address(pkt->pages[p]) + offs; 1023 void *vto = page_address(pkt->pages[p]) + offs;
1024 memcpy(vto, vfrom, CD_FRAMESIZE); 1024 memcpy(vto, vfrom, CD_FRAMESIZE);
1025 kunmap_atomic(vfrom, KM_USER0); 1025 kunmap_atomic(vfrom);
1026 bvec[f].bv_page = pkt->pages[p]; 1026 bvec[f].bv_page = pkt->pages[p];
1027 bvec[f].bv_offset = offs; 1027 bvec[f].bv_offset = offs;
1028 } else { 1028 } else {
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 48e8fee9f2d4..9dcf76a10bb6 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -839,10 +839,7 @@ static struct vio_driver vdc_port_driver = {
839 .id_table = vdc_port_match, 839 .id_table = vdc_port_match,
840 .probe = vdc_port_probe, 840 .probe = vdc_port_probe,
841 .remove = vdc_port_remove, 841 .remove = vdc_port_remove,
842 .driver = { 842 .name = "vdc_port",
843 .name = "vdc_port",
844 .owner = THIS_MODULE,
845 }
846}; 843};
847 844
848static int __init vdc_init(void) 845static int __init vdc_init(void)
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 7333b9e44411..fcec0225ac76 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -119,43 +119,6 @@
119 119
120/* 120/*
121 */ 121 */
122
123/* command block wrapper */
124struct bulk_cb_wrap {
125 __le32 Signature; /* contains 'USBC' */
126 u32 Tag; /* unique per command id */
127 __le32 DataTransferLength; /* size of data */
128 u8 Flags; /* direction in bit 0 */
129 u8 Lun; /* LUN */
130 u8 Length; /* of of the CDB */
131 u8 CDB[UB_MAX_CDB_SIZE]; /* max command */
132};
133
134#define US_BULK_CB_WRAP_LEN 31
135#define US_BULK_CB_SIGN 0x43425355 /*spells out USBC */
136#define US_BULK_FLAG_IN 1
137#define US_BULK_FLAG_OUT 0
138
139/* command status wrapper */
140struct bulk_cs_wrap {
141 __le32 Signature; /* should = 'USBS' */
142 u32 Tag; /* same as original command */
143 __le32 Residue; /* amount not transferred */
144 u8 Status; /* see below */
145};
146
147#define US_BULK_CS_WRAP_LEN 13
148#define US_BULK_CS_SIGN 0x53425355 /* spells out 'USBS' */
149#define US_BULK_STAT_OK 0
150#define US_BULK_STAT_FAIL 1
151#define US_BULK_STAT_PHASE 2
152
153/* bulk-only class specific requests */
154#define US_BULK_RESET_REQUEST 0xff
155#define US_BULK_GET_MAX_LUN 0xfe
156
157/*
158 */
159struct ub_dev; 122struct ub_dev;
160 123
161#define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ 124#define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */
@@ -2477,6 +2440,8 @@ static int __init ub_init(void)
2477 int rc; 2440 int rc;
2478 int i; 2441 int i;
2479 2442
2443 pr_info("'Low Performance USB Block' driver is deprecated. "
2444 "Please switch to usb-storage\n");
2480 for (i = 0; i < UB_QLOCK_NUM; i++) 2445 for (i = 0; i < UB_QLOCK_NUM; i++)
2481 spin_lock_init(&ub_qlockv[i]); 2446 spin_lock_init(&ub_qlockv[i]);
2482 2447
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
deleted file mode 100644
index 9a5b2a2d616d..000000000000
--- a/drivers/block/viodasd.c
+++ /dev/null
@@ -1,809 +0,0 @@
1/* -*- linux-c -*-
2 * viodasd.c
3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
6 * Stephen Rothwell
7 *
8 * (C) Copyright 2000-2004 IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 * This routine provides access to disk space (termed "DASD" in historical
25 * IBM terms) owned and managed by an OS/400 partition running on the
26 * same box as this Linux partition.
27 *
28 * All disk operations are performed by sending messages back and forth to
29 * the OS/400 partition.
30 */
31
32#define pr_fmt(fmt) "viod: " fmt
33
34#include <linux/major.h>
35#include <linux/fs.h>
36#include <linux/module.h>
37#include <linux/kernel.h>
38#include <linux/blkdev.h>
39#include <linux/genhd.h>
40#include <linux/hdreg.h>
41#include <linux/errno.h>
42#include <linux/init.h>
43#include <linux/string.h>
44#include <linux/mutex.h>
45#include <linux/dma-mapping.h>
46#include <linux/completion.h>
47#include <linux/device.h>
48#include <linux/scatterlist.h>
49
50#include <asm/uaccess.h>
51#include <asm/vio.h>
52#include <asm/iseries/hv_types.h>
53#include <asm/iseries/hv_lp_event.h>
54#include <asm/iseries/hv_lp_config.h>
55#include <asm/iseries/vio.h>
56#include <asm/firmware.h>
57
58MODULE_DESCRIPTION("iSeries Virtual DASD");
59MODULE_AUTHOR("Dave Boutcher");
60MODULE_LICENSE("GPL");
61
62/*
63 * We only support 7 partitions per physical disk....so with minor
64 * numbers 0-255 we get a maximum of 32 disks.
65 */
66#define VIOD_GENHD_NAME "iseries/vd"
67
68#define VIOD_VERS "1.64"
69
70enum {
71 PARTITION_SHIFT = 3,
72 MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
73 MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name)
74};
75
76static DEFINE_MUTEX(viodasd_mutex);
77static DEFINE_SPINLOCK(viodasd_spinlock);
78
79#define VIOMAXREQ 16
80
81#define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
82
83struct viodasd_waitevent {
84 struct completion com;
85 int rc;
86 u16 sub_result;
87 int max_disk; /* open */
88};
89
90static const struct vio_error_entry viodasd_err_table[] = {
91 { 0x0201, EINVAL, "Invalid Range" },
92 { 0x0202, EINVAL, "Invalid Token" },
93 { 0x0203, EIO, "DMA Error" },
94 { 0x0204, EIO, "Use Error" },
95 { 0x0205, EIO, "Release Error" },
96 { 0x0206, EINVAL, "Invalid Disk" },
97 { 0x0207, EBUSY, "Can't Lock" },
98 { 0x0208, EIO, "Already Locked" },
99 { 0x0209, EIO, "Already Unlocked" },
100 { 0x020A, EIO, "Invalid Arg" },
101 { 0x020B, EIO, "Bad IFS File" },
102 { 0x020C, EROFS, "Read Only Device" },
103 { 0x02FF, EIO, "Internal Error" },
104 { 0x0000, 0, NULL },
105};
106
107/*
108 * Figure out the biggest I/O request (in sectors) we can accept
109 */
110#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
111
112/*
113 * Number of disk I/O requests we've sent to OS/400
114 */
115static int num_req_outstanding;
116
117/*
118 * This is our internal structure for keeping track of disk devices
119 */
120struct viodasd_device {
121 u16 cylinders;
122 u16 tracks;
123 u16 sectors;
124 u16 bytes_per_sector;
125 u64 size;
126 int read_only;
127 spinlock_t q_lock;
128 struct gendisk *disk;
129 struct device *dev;
130} viodasd_devices[MAX_DISKNO];
131
132/*
133 * External open entry point.
134 */
135static int viodasd_open(struct block_device *bdev, fmode_t mode)
136{
137 struct viodasd_device *d = bdev->bd_disk->private_data;
138 HvLpEvent_Rc hvrc;
139 struct viodasd_waitevent we;
140 u16 flags = 0;
141
142 if (d->read_only) {
143 if (mode & FMODE_WRITE)
144 return -EROFS;
145 flags = vioblockflags_ro;
146 }
147
148 init_completion(&we.com);
149
150 /* Send the open event to OS/400 */
151 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
152 HvLpEvent_Type_VirtualIo,
153 viomajorsubtype_blockio | vioblockopen,
154 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
155 viopath_sourceinst(viopath_hostLp),
156 viopath_targetinst(viopath_hostLp),
157 (u64)(unsigned long)&we, VIOVERSION << 16,
158 ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
159 0, 0, 0);
160 if (hvrc != 0) {
161 pr_warning("HV open failed %d\n", (int)hvrc);
162 return -EIO;
163 }
164
165 wait_for_completion(&we.com);
166
167 /* Check the return code */
168 if (we.rc != 0) {
169 const struct vio_error_entry *err =
170 vio_lookup_rc(viodasd_err_table, we.sub_result);
171
172 pr_warning("bad rc opening disk: %d:0x%04x (%s)\n",
173 (int)we.rc, we.sub_result, err->msg);
174 return -EIO;
175 }
176
177 return 0;
178}
179
180static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
181{
182 int ret;
183
184 mutex_lock(&viodasd_mutex);
185 ret = viodasd_open(bdev, mode);
186 mutex_unlock(&viodasd_mutex);
187
188 return ret;
189}
190
191
192/*
193 * External release entry point.
194 */
195static int viodasd_release(struct gendisk *disk, fmode_t mode)
196{
197 struct viodasd_device *d = disk->private_data;
198 HvLpEvent_Rc hvrc;
199
200 mutex_lock(&viodasd_mutex);
201 /* Send the event to OS/400. We DON'T expect a response */
202 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
203 HvLpEvent_Type_VirtualIo,
204 viomajorsubtype_blockio | vioblockclose,
205 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
206 viopath_sourceinst(viopath_hostLp),
207 viopath_targetinst(viopath_hostLp),
208 0, VIOVERSION << 16,
209 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
210 0, 0, 0);
211 if (hvrc != 0)
212 pr_warning("HV close call failed %d\n", (int)hvrc);
213
214 mutex_unlock(&viodasd_mutex);
215
216 return 0;
217}
218
219
220/* External ioctl entry point.
221 */
222static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
223{
224 struct gendisk *disk = bdev->bd_disk;
225 struct viodasd_device *d = disk->private_data;
226
227 geo->sectors = d->sectors ? d->sectors : 32;
228 geo->heads = d->tracks ? d->tracks : 64;
229 geo->cylinders = d->cylinders ? d->cylinders :
230 get_capacity(disk) / (geo->sectors * geo->heads);
231
232 return 0;
233}
234
235/*
236 * Our file operations table
237 */
238static const struct block_device_operations viodasd_fops = {
239 .owner = THIS_MODULE,
240 .open = viodasd_unlocked_open,
241 .release = viodasd_release,
242 .getgeo = viodasd_getgeo,
243};
244
245/*
246 * End a request
247 */
248static void viodasd_end_request(struct request *req, int error,
249 int num_sectors)
250{
251 __blk_end_request(req, error, num_sectors << 9);
252}
253
254/*
255 * Send an actual I/O request to OS/400
256 */
257static int send_request(struct request *req)
258{
259 u64 start;
260 int direction;
261 int nsg;
262 u16 viocmd;
263 HvLpEvent_Rc hvrc;
264 struct vioblocklpevent *bevent;
265 struct HvLpEvent *hev;
266 struct scatterlist sg[VIOMAXBLOCKDMA];
267 int sgindex;
268 struct viodasd_device *d;
269 unsigned long flags;
270
271 start = (u64)blk_rq_pos(req) << 9;
272
273 if (rq_data_dir(req) == READ) {
274 direction = DMA_FROM_DEVICE;
275 viocmd = viomajorsubtype_blockio | vioblockread;
276 } else {
277 direction = DMA_TO_DEVICE;
278 viocmd = viomajorsubtype_blockio | vioblockwrite;
279 }
280
281 d = req->rq_disk->private_data;
282
283 /* Now build the scatter-gather list */
284 sg_init_table(sg, VIOMAXBLOCKDMA);
285 nsg = blk_rq_map_sg(req->q, req, sg);
286 nsg = dma_map_sg(d->dev, sg, nsg, direction);
287
288 spin_lock_irqsave(&viodasd_spinlock, flags);
289 num_req_outstanding++;
290
291 /* This optimization handles a single DMA block */
292 if (nsg == 1)
293 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
294 HvLpEvent_Type_VirtualIo, viocmd,
295 HvLpEvent_AckInd_DoAck,
296 HvLpEvent_AckType_ImmediateAck,
297 viopath_sourceinst(viopath_hostLp),
298 viopath_targetinst(viopath_hostLp),
299 (u64)(unsigned long)req, VIOVERSION << 16,
300 ((u64)DEVICE_NO(d) << 48), start,
301 ((u64)sg_dma_address(&sg[0])) << 32,
302 sg_dma_len(&sg[0]));
303 else {
304 bevent = (struct vioblocklpevent *)
305 vio_get_event_buffer(viomajorsubtype_blockio);
306 if (bevent == NULL) {
307 pr_warning("error allocating disk event buffer\n");
308 goto error_ret;
309 }
310
311 /*
312 * Now build up the actual request. Note that we store
313 * the pointer to the request in the correlation
314 * token so we can match the response up later
315 */
316 memset(bevent, 0, sizeof(struct vioblocklpevent));
317 hev = &bevent->event;
318 hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
319 HV_LP_EVENT_INT;
320 hev->xType = HvLpEvent_Type_VirtualIo;
321 hev->xSubtype = viocmd;
322 hev->xSourceLp = HvLpConfig_getLpIndex();
323 hev->xTargetLp = viopath_hostLp;
324 hev->xSizeMinus1 =
325 offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
326 (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
327 hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
328 hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
329 hev->xCorrelationToken = (u64)req;
330 bevent->version = VIOVERSION;
331 bevent->disk = DEVICE_NO(d);
332 bevent->u.rw_data.offset = start;
333
334 /*
335 * Copy just the dma information from the sg list
336 * into the request
337 */
338 for (sgindex = 0; sgindex < nsg; sgindex++) {
339 bevent->u.rw_data.dma_info[sgindex].token =
340 sg_dma_address(&sg[sgindex]);
341 bevent->u.rw_data.dma_info[sgindex].len =
342 sg_dma_len(&sg[sgindex]);
343 }
344
345 /* Send the request */
346 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
347 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
348 }
349
350 if (hvrc != HvLpEvent_Rc_Good) {
351 pr_warning("error sending disk event to OS/400 (rc %d)\n",
352 (int)hvrc);
353 goto error_ret;
354 }
355 spin_unlock_irqrestore(&viodasd_spinlock, flags);
356 return 0;
357
358error_ret:
359 num_req_outstanding--;
360 spin_unlock_irqrestore(&viodasd_spinlock, flags);
361 dma_unmap_sg(d->dev, sg, nsg, direction);
362 return -1;
363}
364
365/*
366 * This is the external request processing routine
367 */
368static void do_viodasd_request(struct request_queue *q)
369{
370 struct request *req;
371
372 /*
373 * If we already have the maximum number of requests
374 * outstanding to OS/400 just bail out. We'll come
375 * back later.
376 */
377 while (num_req_outstanding < VIOMAXREQ) {
378 req = blk_fetch_request(q);
379 if (req == NULL)
380 return;
381 /* check that request contains a valid command */
382 if (req->cmd_type != REQ_TYPE_FS) {
383 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
384 continue;
385 }
386 /* Try sending the request */
387 if (send_request(req) != 0)
388 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
389 }
390}
391
392/*
393 * Probe a single disk and fill in the viodasd_device structure
394 * for it.
395 */
396static int probe_disk(struct viodasd_device *d)
397{
398 HvLpEvent_Rc hvrc;
399 struct viodasd_waitevent we;
400 int dev_no = DEVICE_NO(d);
401 struct gendisk *g;
402 struct request_queue *q;
403 u16 flags = 0;
404
405retry:
406 init_completion(&we.com);
407
408 /* Send the open event to OS/400 */
409 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
410 HvLpEvent_Type_VirtualIo,
411 viomajorsubtype_blockio | vioblockopen,
412 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
413 viopath_sourceinst(viopath_hostLp),
414 viopath_targetinst(viopath_hostLp),
415 (u64)(unsigned long)&we, VIOVERSION << 16,
416 ((u64)dev_no << 48) | ((u64)flags<< 32),
417 0, 0, 0);
418 if (hvrc != 0) {
419 pr_warning("bad rc on HV open %d\n", (int)hvrc);
420 return 0;
421 }
422
423 wait_for_completion(&we.com);
424
425 if (we.rc != 0) {
426 if (flags != 0)
427 return 0;
428 /* try again with read only flag set */
429 flags = vioblockflags_ro;
430 goto retry;
431 }
432 if (we.max_disk > (MAX_DISKNO - 1)) {
433 printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"),
434 MAX_DISKNO, we.max_disk + 1);
435 }
436
437 /* Send the close event to OS/400. We DON'T expect a response */
438 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
439 HvLpEvent_Type_VirtualIo,
440 viomajorsubtype_blockio | vioblockclose,
441 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
442 viopath_sourceinst(viopath_hostLp),
443 viopath_targetinst(viopath_hostLp),
444 0, VIOVERSION << 16,
445 ((u64)dev_no << 48) | ((u64)flags << 32),
446 0, 0, 0);
447 if (hvrc != 0) {
448 pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc);
449 return 0;
450 }
451
452 if (d->dev == NULL) {
453 /* this is when we reprobe for new disks */
454 if (vio_create_viodasd(dev_no) == NULL) {
455 pr_warning("cannot allocate virtual device for disk %d\n",
456 dev_no);
457 return 0;
458 }
459 /*
460 * The vio_create_viodasd will have recursed into this
461 * routine with d->dev set to the new vio device and
462 * will finish the setup of the disk below.
463 */
464 return 1;
465 }
466
467 /* create the request queue for the disk */
468 spin_lock_init(&d->q_lock);
469 q = blk_init_queue(do_viodasd_request, &d->q_lock);
470 if (q == NULL) {
471 pr_warning("cannot allocate queue for disk %d\n", dev_no);
472 return 0;
473 }
474 g = alloc_disk(1 << PARTITION_SHIFT);
475 if (g == NULL) {
476 pr_warning("cannot allocate disk structure for disk %d\n",
477 dev_no);
478 blk_cleanup_queue(q);
479 return 0;
480 }
481
482 d->disk = g;
483 blk_queue_max_segments(q, VIOMAXBLOCKDMA);
484 blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
485 g->major = VIODASD_MAJOR;
486 g->first_minor = dev_no << PARTITION_SHIFT;
487 if (dev_no >= 26)
488 snprintf(g->disk_name, sizeof(g->disk_name),
489 VIOD_GENHD_NAME "%c%c",
490 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
491 else
492 snprintf(g->disk_name, sizeof(g->disk_name),
493 VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
494 g->fops = &viodasd_fops;
495 g->queue = q;
496 g->private_data = d;
497 g->driverfs_dev = d->dev;
498 set_capacity(g, d->size >> 9);
499
500 pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n",
501 dev_no, (unsigned long)(d->size >> 9),
502 (unsigned long)(d->size >> 20),
503 (int)d->cylinders, (int)d->tracks,
504 (int)d->sectors, (int)d->bytes_per_sector,
505 d->read_only ? " (RO)" : "");
506
507 /* register us in the global list */
508 add_disk(g);
509 return 1;
510}
511
512/* returns the total number of scatterlist elements converted */
513static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
514 struct scatterlist *sg, int *total_len)
515{
516 int i, numsg;
517 const struct rw_data *rw_data = &bevent->u.rw_data;
518 static const int offset =
519 offsetof(struct vioblocklpevent, u.rw_data.dma_info);
520 static const int element_size = sizeof(rw_data->dma_info[0]);
521
522 numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
523 if (numsg > VIOMAXBLOCKDMA)
524 numsg = VIOMAXBLOCKDMA;
525
526 *total_len = 0;
527 sg_init_table(sg, VIOMAXBLOCKDMA);
528 for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
529 sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
530 sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
531 *total_len += rw_data->dma_info[i].len;
532 }
533 return i;
534}
535
536/*
537 * Restart all queues, starting with the one _after_ the disk given,
538 * thus reducing the chance of starvation of higher numbered disks.
539 */
540static void viodasd_restart_all_queues_starting_from(int first_index)
541{
542 int i;
543
544 for (i = first_index + 1; i < MAX_DISKNO; ++i)
545 if (viodasd_devices[i].disk)
546 blk_run_queue(viodasd_devices[i].disk->queue);
547 for (i = 0; i <= first_index; ++i)
548 if (viodasd_devices[i].disk)
549 blk_run_queue(viodasd_devices[i].disk->queue);
550}
551
552/*
553 * For read and write requests, decrement the number of outstanding requests,
554 * Free the DMA buffers we allocated.
555 */
556static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
557{
558 int num_sg, num_sect, pci_direction, total_len;
559 struct request *req;
560 struct scatterlist sg[VIOMAXBLOCKDMA];
561 struct HvLpEvent *event = &bevent->event;
562 unsigned long irq_flags;
563 struct viodasd_device *d;
564 int error;
565 spinlock_t *qlock;
566
567 num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
568 num_sect = total_len >> 9;
569 if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
570 pci_direction = DMA_FROM_DEVICE;
571 else
572 pci_direction = DMA_TO_DEVICE;
573 req = (struct request *)bevent->event.xCorrelationToken;
574 d = req->rq_disk->private_data;
575
576 dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
577
578 /*
579 * Since this is running in interrupt mode, we need to make sure
580 * we're not stepping on any global I/O operations
581 */
582 spin_lock_irqsave(&viodasd_spinlock, irq_flags);
583 num_req_outstanding--;
584 spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
585
586 error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO;
587 if (error) {
588 const struct vio_error_entry *err;
589 err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
590 pr_warning("read/write error %d:0x%04x (%s)\n",
591 event->xRc, bevent->sub_result, err->msg);
592 num_sect = blk_rq_sectors(req);
593 }
594 qlock = req->q->queue_lock;
595 spin_lock_irqsave(qlock, irq_flags);
596 viodasd_end_request(req, error, num_sect);
597 spin_unlock_irqrestore(qlock, irq_flags);
598
599 /* Finally, try to get more requests off of this device's queue */
600 viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
601
602 return 0;
603}
604
605/* This routine handles incoming block LP events */
606static void handle_block_event(struct HvLpEvent *event)
607{
608 struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
609 struct viodasd_waitevent *pwe;
610
611 if (event == NULL)
612 /* Notification that a partition went away! */
613 return;
614 /* First, we should NEVER get an int here...only acks */
615 if (hvlpevent_is_int(event)) {
616 pr_warning("Yikes! got an int in viodasd event handler!\n");
617 if (hvlpevent_need_ack(event)) {
618 event->xRc = HvLpEvent_Rc_InvalidSubtype;
619 HvCallEvent_ackLpEvent(event);
620 }
621 }
622
623 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
624 case vioblockopen:
625 /*
626 * Handle a response to an open request. We get all the
627 * disk information in the response, so update it. The
628 * correlation token contains a pointer to a waitevent
629 * structure that has a completion in it. update the
630 * return code in the waitevent structure and post the
631 * completion to wake up the guy who sent the request
632 */
633 pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
634 pwe->rc = event->xRc;
635 pwe->sub_result = bevent->sub_result;
636 if (event->xRc == HvLpEvent_Rc_Good) {
637 const struct open_data *data = &bevent->u.open_data;
638 struct viodasd_device *device =
639 &viodasd_devices[bevent->disk];
640 device->read_only =
641 bevent->flags & vioblockflags_ro;
642 device->size = data->disk_size;
643 device->cylinders = data->cylinders;
644 device->tracks = data->tracks;
645 device->sectors = data->sectors;
646 device->bytes_per_sector = data->bytes_per_sector;
647 pwe->max_disk = data->max_disk;
648 }
649 complete(&pwe->com);
650 break;
651 case vioblockclose:
652 break;
653 case vioblockread:
654 case vioblockwrite:
655 viodasd_handle_read_write(bevent);
656 break;
657
658 default:
659 pr_warning("invalid subtype!");
660 if (hvlpevent_need_ack(event)) {
661 event->xRc = HvLpEvent_Rc_InvalidSubtype;
662 HvCallEvent_ackLpEvent(event);
663 }
664 }
665}
666
667/*
668 * Get the driver to reprobe for more disks.
669 */
670static ssize_t probe_disks(struct device_driver *drv, const char *buf,
671 size_t count)
672{
673 struct viodasd_device *d;
674
675 for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
676 if (d->disk == NULL)
677 probe_disk(d);
678 }
679 return count;
680}
681static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
682
683static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
684{
685 struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
686
687 d->dev = &vdev->dev;
688 if (!probe_disk(d))
689 return -ENODEV;
690 return 0;
691}
692
693static int viodasd_remove(struct vio_dev *vdev)
694{
695 struct viodasd_device *d;
696
697 d = &viodasd_devices[vdev->unit_address];
698 if (d->disk) {
699 del_gendisk(d->disk);
700 blk_cleanup_queue(d->disk->queue);
701 put_disk(d->disk);
702 d->disk = NULL;
703 }
704 d->dev = NULL;
705 return 0;
706}
707
708/**
709 * viodasd_device_table: Used by vio.c to match devices that we
710 * support.
711 */
712static struct vio_device_id viodasd_device_table[] __devinitdata = {
713 { "block", "IBM,iSeries-viodasd" },
714 { "", "" }
715};
716MODULE_DEVICE_TABLE(vio, viodasd_device_table);
717
718static struct vio_driver viodasd_driver = {
719 .id_table = viodasd_device_table,
720 .probe = viodasd_probe,
721 .remove = viodasd_remove,
722 .driver = {
723 .name = "viodasd",
724 .owner = THIS_MODULE,
725 }
726};
727
728static int need_delete_probe;
729
730/*
731 * Initialize the whole device driver. Handle module and non-module
732 * versions
733 */
734static int __init viodasd_init(void)
735{
736 int rc;
737
738 if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
739 rc = -ENODEV;
740 goto early_fail;
741 }
742
743 /* Try to open to our host lp */
744 if (viopath_hostLp == HvLpIndexInvalid)
745 vio_set_hostlp();
746
747 if (viopath_hostLp == HvLpIndexInvalid) {
748 pr_warning("invalid hosting partition\n");
749 rc = -EIO;
750 goto early_fail;
751 }
752
753 pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp);
754
755 /* register the block device */
756 rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
757 if (rc) {
758 pr_warning("Unable to get major number %d for %s\n",
759 VIODASD_MAJOR, VIOD_GENHD_NAME);
760 goto early_fail;
761 }
762 /* Actually open the path to the hosting partition */
763 rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
764 VIOMAXREQ + 2);
765 if (rc) {
766 pr_warning("error opening path to host partition %d\n",
767 viopath_hostLp);
768 goto unregister_blk;
769 }
770
771 /* Initialize our request handler */
772 vio_setHandler(viomajorsubtype_blockio, handle_block_event);
773
774 rc = vio_register_driver(&viodasd_driver);
775 if (rc) {
776 pr_warning("vio_register_driver failed\n");
777 goto unset_handler;
778 }
779
780 /*
781 * If this call fails, it just means that we cannot dynamically
782 * add virtual disks, but the driver will still work fine for
783 * all existing disk, so ignore the failure.
784 */
785 if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe))
786 need_delete_probe = 1;
787
788 return 0;
789
790unset_handler:
791 vio_clearHandler(viomajorsubtype_blockio);
792 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
793unregister_blk:
794 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
795early_fail:
796 return rc;
797}
798module_init(viodasd_init);
799
800void __exit viodasd_exit(void)
801{
802 if (need_delete_probe)
803 driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
804 vio_unregister_driver(&viodasd_driver);
805 vio_clearHandler(viomajorsubtype_blockio);
806 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
807 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
808}
809module_exit(viodasd_exit);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c4a60badf252..693187df7601 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -29,9 +29,6 @@ struct virtio_blk
29 /* The disk structure for the kernel. */ 29 /* The disk structure for the kernel. */
30 struct gendisk *disk; 30 struct gendisk *disk;
31 31
32 /* Request tracking. */
33 struct list_head reqs;
34
35 mempool_t *pool; 32 mempool_t *pool;
36 33
37 /* Process context for config space updates */ 34 /* Process context for config space updates */
@@ -55,7 +52,6 @@ struct virtio_blk
55 52
56struct virtblk_req 53struct virtblk_req
57{ 54{
58 struct list_head list;
59 struct request *req; 55 struct request *req;
60 struct virtio_blk_outhdr out_hdr; 56 struct virtio_blk_outhdr out_hdr;
61 struct virtio_scsi_inhdr in_hdr; 57 struct virtio_scsi_inhdr in_hdr;
@@ -99,7 +95,6 @@ static void blk_done(struct virtqueue *vq)
99 } 95 }
100 96
101 __blk_end_request_all(vbr->req, error); 97 __blk_end_request_all(vbr->req, error);
102 list_del(&vbr->list);
103 mempool_free(vbr, vblk->pool); 98 mempool_free(vbr, vblk->pool);
104 } 99 }
105 /* In case queue is stopped waiting for more buffers. */ 100 /* In case queue is stopped waiting for more buffers. */
@@ -184,7 +179,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
184 return false; 179 return false;
185 } 180 }
186 181
187 list_add_tail(&vbr->list, &vblk->reqs);
188 return true; 182 return true;
189} 183}
190 184
@@ -351,6 +345,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
351 cap_str_10, cap_str_2); 345 cap_str_10, cap_str_2);
352 346
353 set_capacity(vblk->disk, capacity); 347 set_capacity(vblk->disk, capacity);
348 revalidate_disk(vblk->disk);
354done: 349done:
355 mutex_unlock(&vblk->config_lock); 350 mutex_unlock(&vblk->config_lock);
356} 351}
@@ -374,6 +369,34 @@ static int init_vq(struct virtio_blk *vblk)
374 return err; 369 return err;
375} 370}
376 371
372/*
373 * Legacy naming scheme used for virtio devices. We are stuck with it for
374 * virtio blk but don't ever use it for any new driver.
375 */
376static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
377{
378 const int base = 'z' - 'a' + 1;
379 char *begin = buf + strlen(prefix);
380 char *end = buf + buflen;
381 char *p;
382 int unit;
383
384 p = end - 1;
385 *p = '\0';
386 unit = base;
387 do {
388 if (p == begin)
389 return -EINVAL;
390 *--p = 'a' + (index % unit);
391 index = (index / unit) - 1;
392 } while (index >= 0);
393
394 memmove(begin, p, end - p);
395 memcpy(buf, prefix, strlen(prefix));
396
397 return 0;
398}
399
377static int __devinit virtblk_probe(struct virtio_device *vdev) 400static int __devinit virtblk_probe(struct virtio_device *vdev)
378{ 401{
379 struct virtio_blk *vblk; 402 struct virtio_blk *vblk;
@@ -408,7 +431,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
408 goto out_free_index; 431 goto out_free_index;
409 } 432 }
410 433
411 INIT_LIST_HEAD(&vblk->reqs);
412 spin_lock_init(&vblk->lock); 434 spin_lock_init(&vblk->lock);
413 vblk->vdev = vdev; 435 vblk->vdev = vdev;
414 vblk->sg_elems = sg_elems; 436 vblk->sg_elems = sg_elems;
@@ -442,18 +464,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
442 464
443 q->queuedata = vblk; 465 q->queuedata = vblk;
444 466
445 if (index < 26) { 467 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
446 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
447 } else if (index < (26 + 1) * 26) {
448 sprintf(vblk->disk->disk_name, "vd%c%c",
449 'a' + index / 26 - 1, 'a' + index % 26);
450 } else {
451 const unsigned int m1 = (index / 26 - 1) / 26 - 1;
452 const unsigned int m2 = (index / 26 - 1) % 26;
453 const unsigned int m3 = index % 26;
454 sprintf(vblk->disk->disk_name, "vd%c%c%c",
455 'a' + m1, 'a' + m2, 'a' + m3);
456 }
457 468
458 vblk->disk->major = major; 469 vblk->disk->major = major;
459 vblk->disk->first_minor = index_to_minor(index); 470 vblk->disk->first_minor = index_to_minor(index);
@@ -565,21 +576,29 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
565{ 576{
566 struct virtio_blk *vblk = vdev->priv; 577 struct virtio_blk *vblk = vdev->priv;
567 int index = vblk->index; 578 int index = vblk->index;
579 struct virtblk_req *vbr;
580 unsigned long flags;
568 581
569 /* Prevent config work handler from accessing the device. */ 582 /* Prevent config work handler from accessing the device. */
570 mutex_lock(&vblk->config_lock); 583 mutex_lock(&vblk->config_lock);
571 vblk->config_enable = false; 584 vblk->config_enable = false;
572 mutex_unlock(&vblk->config_lock); 585 mutex_unlock(&vblk->config_lock);
573 586
574 /* Nothing should be pending. */
575 BUG_ON(!list_empty(&vblk->reqs));
576
577 /* Stop all the virtqueues. */ 587 /* Stop all the virtqueues. */
578 vdev->config->reset(vdev); 588 vdev->config->reset(vdev);
579 589
580 flush_work(&vblk->config_work); 590 flush_work(&vblk->config_work);
581 591
582 del_gendisk(vblk->disk); 592 del_gendisk(vblk->disk);
593
594 /* Abort requests dispatched to driver. */
595 spin_lock_irqsave(&vblk->lock, flags);
596 while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) {
597 __blk_end_request_all(vbr->req, -EIO);
598 mempool_free(vbr, vblk->pool);
599 }
600 spin_unlock_irqrestore(&vblk->lock, flags);
601
583 blk_cleanup_queue(vblk->disk->queue); 602 blk_cleanup_queue(vblk->disk->queue);
584 put_disk(vblk->disk); 603 put_disk(vblk->disk);
585 mempool_destroy(vblk->pool); 604 mempool_destroy(vblk->pool);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 51a972704db5..ff540520bada 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -52,7 +52,6 @@
52#include <linux/io.h> 52#include <linux/io.h>
53#include <linux/gfp.h> 53#include <linux/gfp.h>
54 54
55#include <asm/system.h>
56#include <asm/uaccess.h> 55#include <asm/uaccess.h>
57#include <asm/dma.h> 56#include <asm/dma.h>
58 57
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf60f368..73f196ca713f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -321,6 +321,7 @@ struct seg_buf {
321static void xen_blkbk_unmap(struct pending_req *req) 321static void xen_blkbk_unmap(struct pending_req *req)
322{ 322{
323 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 323 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
324 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
324 unsigned int i, invcount = 0; 325 unsigned int i, invcount = 0;
325 grant_handle_t handle; 326 grant_handle_t handle;
326 int ret; 327 int ret;
@@ -332,25 +333,12 @@ static void xen_blkbk_unmap(struct pending_req *req)
332 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), 333 gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
333 GNTMAP_host_map, handle); 334 GNTMAP_host_map, handle);
334 pending_handle(req, i) = BLKBACK_INVALID_HANDLE; 335 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
336 pages[invcount] = virt_to_page(vaddr(req, i));
335 invcount++; 337 invcount++;
336 } 338 }
337 339
338 ret = HYPERVISOR_grant_table_op( 340 ret = gnttab_unmap_refs(unmap, pages, invcount, false);
339 GNTTABOP_unmap_grant_ref, unmap, invcount);
340 BUG_ON(ret); 341 BUG_ON(ret);
341 /*
342 * Note, we use invcount, so nr->pages, so we can't index
343 * using vaddr(req, i).
344 */
345 for (i = 0; i < invcount; i++) {
346 ret = m2p_remove_override(
347 virt_to_page(unmap[i].host_addr), false);
348 if (ret) {
349 pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
350 (unsigned long)unmap[i].host_addr);
351 continue;
352 }
353 }
354} 342}
355 343
356static int xen_blkbk_map(struct blkif_request *req, 344static int xen_blkbk_map(struct blkif_request *req,
@@ -378,7 +366,7 @@ static int xen_blkbk_map(struct blkif_request *req,
378 pending_req->blkif->domid); 366 pending_req->blkif->domid);
379 } 367 }
380 368
381 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); 369 ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
382 BUG_ON(ret); 370 BUG_ON(ret);
383 371
384 /* 372 /*
@@ -398,15 +386,6 @@ static int xen_blkbk_map(struct blkif_request *req,
398 if (ret) 386 if (ret)
399 continue; 387 continue;
400 388
401 ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
402 blkbk->pending_page(pending_req, i), NULL);
403 if (ret) {
404 pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
405 (unsigned long)map[i].dev_bus_addr, ret);
406 /* We could switch over to GNTTABOP_copy */
407 continue;
408 }
409
410 seg[i].buf = map[i].dev_bus_addr | 389 seg[i].buf = map[i].dev_bus_addr |
411 (req->u.rw.seg[i].first_sect << 9); 390 (req->u.rw.seg[i].first_sect << 9);
412 } 391 }
@@ -419,21 +398,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
419 int err = 0; 398 int err = 0;
420 int status = BLKIF_RSP_OKAY; 399 int status = BLKIF_RSP_OKAY;
421 struct block_device *bdev = blkif->vbd.bdev; 400 struct block_device *bdev = blkif->vbd.bdev;
401 unsigned long secure;
422 402
423 blkif->st_ds_req++; 403 blkif->st_ds_req++;
424 404
425 xen_blkif_get(blkif); 405 xen_blkif_get(blkif);
426 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || 406 secure = (blkif->vbd.discard_secure &&
427 blkif->blk_backend_type == BLKIF_BACKEND_FILE) { 407 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
428 unsigned long secure = (blkif->vbd.discard_secure && 408 BLKDEV_DISCARD_SECURE : 0;
429 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? 409
430 BLKDEV_DISCARD_SECURE : 0; 410 err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
431 err = blkdev_issue_discard(bdev, 411 req->u.discard.nr_sectors,
432 req->u.discard.sector_number, 412 GFP_KERNEL, secure);
433 req->u.discard.nr_sectors,
434 GFP_KERNEL, secure);
435 } else
436 err = -EOPNOTSUPP;
437 413
438 if (err == -EOPNOTSUPP) { 414 if (err == -EOPNOTSUPP) {
439 pr_debug(DRV_PFX "discard op failed, not supported\n"); 415 pr_debug(DRV_PFX "discard op failed, not supported\n");
@@ -830,7 +806,7 @@ static int __init xen_blkif_init(void)
830 int i, mmap_pages; 806 int i, mmap_pages;
831 int rc = 0; 807 int rc = 0;
832 808
833 if (!xen_pv_domain()) 809 if (!xen_domain())
834 return -ENODEV; 810 return -ENODEV;
835 811
836 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); 812 blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7edc9be8..773cf27dc23f 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -146,11 +146,6 @@ enum blkif_protocol {
146 BLKIF_PROTOCOL_X86_64 = 3, 146 BLKIF_PROTOCOL_X86_64 = 3,
147}; 147};
148 148
149enum blkif_backend_type {
150 BLKIF_BACKEND_PHY = 1,
151 BLKIF_BACKEND_FILE = 2,
152};
153
154struct xen_vbd { 149struct xen_vbd {
155 /* What the domain refers to this vbd as. */ 150 /* What the domain refers to this vbd as. */
156 blkif_vdev_t handle; 151 blkif_vdev_t handle;
@@ -177,7 +172,6 @@ struct xen_blkif {
177 unsigned int irq; 172 unsigned int irq;
178 /* Comms information. */ 173 /* Comms information. */
179 enum blkif_protocol blk_protocol; 174 enum blkif_protocol blk_protocol;
180 enum blkif_backend_type blk_backend_type;
181 union blkif_back_rings blk_rings; 175 union blkif_back_rings blk_rings;
182 void *blk_ring; 176 void *blk_ring;
183 /* The VBD attached to this interface. */ 177 /* The VBD attached to this interface. */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb57e5d0..4f66171c6683 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -381,72 +381,49 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
381 err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", 381 err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
382 "%d", state); 382 "%d", state);
383 if (err) 383 if (err)
384 xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); 384 dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
385 385
386 return err; 386 return err;
387} 387}
388 388
389int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) 389static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
390{ 390{
391 struct xenbus_device *dev = be->dev; 391 struct xenbus_device *dev = be->dev;
392 struct xen_blkif *blkif = be->blkif; 392 struct xen_blkif *blkif = be->blkif;
393 char *type;
394 int err; 393 int err;
395 int state = 0; 394 int state = 0;
395 struct block_device *bdev = be->blkif->vbd.bdev;
396 struct request_queue *q = bdev_get_queue(bdev);
396 397
397 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); 398 if (blk_queue_discard(q)) {
398 if (!IS_ERR(type)) { 399 err = xenbus_printf(xbt, dev->nodename,
399 if (strncmp(type, "file", 4) == 0) { 400 "discard-granularity", "%u",
400 state = 1; 401 q->limits.discard_granularity);
401 blkif->blk_backend_type = BLKIF_BACKEND_FILE; 402 if (err) {
403 dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
404 return;
402 } 405 }
403 if (strncmp(type, "phy", 3) == 0) { 406 err = xenbus_printf(xbt, dev->nodename,
404 struct block_device *bdev = be->blkif->vbd.bdev; 407 "discard-alignment", "%u",
405 struct request_queue *q = bdev_get_queue(bdev); 408 q->limits.discard_alignment);
406 if (blk_queue_discard(q)) { 409 if (err) {
407 err = xenbus_printf(xbt, dev->nodename, 410 dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
408 "discard-granularity", "%u", 411 return;
409 q->limits.discard_granularity); 412 }
410 if (err) { 413 state = 1;
411 xenbus_dev_fatal(dev, err, 414 /* Optional. */
412 "writing discard-granularity"); 415 err = xenbus_printf(xbt, dev->nodename,
413 goto kfree; 416 "discard-secure", "%d",
414 } 417 blkif->vbd.discard_secure);
415 err = xenbus_printf(xbt, dev->nodename, 418 if (err) {
416 "discard-alignment", "%u", 419 dev_warn(&dev->dev, "writing discard-secure (%d)", err);
417 q->limits.discard_alignment); 420 return;
418 if (err) {
419 xenbus_dev_fatal(dev, err,
420 "writing discard-alignment");
421 goto kfree;
422 }
423 state = 1;
424 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
425 }
426 /* Optional. */
427 err = xenbus_printf(xbt, dev->nodename,
428 "discard-secure", "%d",
429 blkif->vbd.discard_secure);
430 if (err) {
431 xenbus_dev_fatal(dev, err,
432 "writting discard-secure");
433 goto kfree;
434 }
435 } 421 }
436 } else {
437 err = PTR_ERR(type);
438 xenbus_dev_fatal(dev, err, "reading type");
439 goto out;
440 } 422 }
441
442 err = xenbus_printf(xbt, dev->nodename, "feature-discard", 423 err = xenbus_printf(xbt, dev->nodename, "feature-discard",
443 "%d", state); 424 "%d", state);
444 if (err) 425 if (err)
445 xenbus_dev_fatal(dev, err, "writing feature-discard"); 426 dev_warn(&dev->dev, "writing feature-discard (%d)", err);
446kfree:
447 kfree(type);
448out:
449 return err;
450} 427}
451int xen_blkbk_barrier(struct xenbus_transaction xbt, 428int xen_blkbk_barrier(struct xenbus_transaction xbt,
452 struct backend_info *be, int state) 429 struct backend_info *be, int state)
@@ -457,7 +434,7 @@ int xen_blkbk_barrier(struct xenbus_transaction xbt,
457 err = xenbus_printf(xbt, dev->nodename, "feature-barrier", 434 err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
458 "%d", state); 435 "%d", state);
459 if (err) 436 if (err)
460 xenbus_dev_fatal(dev, err, "writing feature-barrier"); 437 dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
461 438
462 return err; 439 return err;
463} 440}
@@ -689,14 +666,12 @@ again:
689 return; 666 return;
690 } 667 }
691 668
692 err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); 669 /* If we can't advertise it is OK. */
693 if (err) 670 xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
694 goto abort;
695 671
696 err = xen_blkbk_discard(xbt, be); 672 xen_blkbk_discard(xbt, be);
697 673
698 /* If we can't advertise it is OK. */ 674 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
699 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
700 675
701 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 676 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
702 (unsigned long long)vbd_sz(&be->blkif->vbd)); 677 (unsigned long long)vbd_sz(&be->blkif->vbd));
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874c0a37..60eed4bdd2e4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/bitmap.h>
46 47
47#include <xen/xen.h> 48#include <xen/xen.h>
48#include <xen/xenbus.h> 49#include <xen/xenbus.h>
@@ -81,6 +82,7 @@ static const struct block_device_operations xlvbd_block_fops;
81 */ 82 */
82struct blkfront_info 83struct blkfront_info
83{ 84{
85 spinlock_t io_lock;
84 struct mutex mutex; 86 struct mutex mutex;
85 struct xenbus_device *xbdev; 87 struct xenbus_device *xbdev;
86 struct gendisk *gd; 88 struct gendisk *gd;
@@ -105,8 +107,6 @@ struct blkfront_info
105 int is_ready; 107 int is_ready;
106}; 108};
107 109
108static DEFINE_SPINLOCK(blkif_io_lock);
109
110static unsigned int nr_minors; 110static unsigned int nr_minors;
111static unsigned long *minors; 111static unsigned long *minors;
112static DEFINE_SPINLOCK(minor_lock); 112static DEFINE_SPINLOCK(minor_lock);
@@ -177,8 +177,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
177 177
178 spin_lock(&minor_lock); 178 spin_lock(&minor_lock);
179 if (find_next_bit(minors, end, minor) >= end) { 179 if (find_next_bit(minors, end, minor) >= end) {
180 for (; minor < end; ++minor) 180 bitmap_set(minors, minor, nr);
181 __set_bit(minor, minors);
182 rc = 0; 181 rc = 0;
183 } else 182 } else
184 rc = -EBUSY; 183 rc = -EBUSY;
@@ -193,8 +192,7 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr)
193 192
194 BUG_ON(end > nr_minors); 193 BUG_ON(end > nr_minors);
195 spin_lock(&minor_lock); 194 spin_lock(&minor_lock);
196 for (; minor < end; ++minor) 195 bitmap_clear(minors, minor, nr);
197 __clear_bit(minor, minors);
198 spin_unlock(&minor_lock); 196 spin_unlock(&minor_lock);
199} 197}
200 198
@@ -419,7 +417,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
419 struct request_queue *rq; 417 struct request_queue *rq;
420 struct blkfront_info *info = gd->private_data; 418 struct blkfront_info *info = gd->private_data;
421 419
422 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 420 rq = blk_init_queue(do_blkif_request, &info->io_lock);
423 if (rq == NULL) 421 if (rq == NULL)
424 return -1; 422 return -1;
425 423
@@ -528,6 +526,14 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
528 return 0; 526 return 0;
529} 527}
530 528
529static char *encode_disk_name(char *ptr, unsigned int n)
530{
531 if (n >= 26)
532 ptr = encode_disk_name(ptr, n / 26 - 1);
533 *ptr = 'a' + n % 26;
534 return ptr + 1;
535}
536
531static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 537static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
532 struct blkfront_info *info, 538 struct blkfront_info *info,
533 u16 vdisk_info, u16 sector_size) 539 u16 vdisk_info, u16 sector_size)
@@ -538,6 +544,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
538 unsigned int offset; 544 unsigned int offset;
539 int minor; 545 int minor;
540 int nr_parts; 546 int nr_parts;
547 char *ptr;
541 548
542 BUG_ON(info->gd != NULL); 549 BUG_ON(info->gd != NULL);
543 BUG_ON(info->rq != NULL); 550 BUG_ON(info->rq != NULL);
@@ -562,7 +569,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
562 "emulated IDE disks,\n\t choose an xvd device name" 569 "emulated IDE disks,\n\t choose an xvd device name"
563 "from xvde on\n", info->vdevice); 570 "from xvde on\n", info->vdevice);
564 } 571 }
565 err = -ENODEV; 572 if (minor >> MINORBITS) {
573 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
574 info->vdevice, minor);
575 return -ENODEV;
576 }
566 577
567 if ((minor % nr_parts) == 0) 578 if ((minor % nr_parts) == 0)
568 nr_minors = nr_parts; 579 nr_minors = nr_parts;
@@ -576,23 +587,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
576 if (gd == NULL) 587 if (gd == NULL)
577 goto release; 588 goto release;
578 589
579 if (nr_minors > 1) { 590 strcpy(gd->disk_name, DEV_NAME);
580 if (offset < 26) 591 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
581 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 592 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
582 else 593 if (nr_minors > 1)
583 sprintf(gd->disk_name, "%s%c%c", DEV_NAME, 594 *ptr = 0;
584 'a' + ((offset / 26)-1), 'a' + (offset % 26)); 595 else
585 } else { 596 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
586 if (offset < 26) 597 "%d", minor & (nr_parts - 1));
587 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
588 'a' + offset,
589 minor & (nr_parts - 1));
590 else
591 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
592 'a' + ((offset / 26) - 1),
593 'a' + (offset % 26),
594 minor & (nr_parts - 1));
595 }
596 598
597 gd->major = XENVBD_MAJOR; 599 gd->major = XENVBD_MAJOR;
598 gd->first_minor = minor; 600 gd->first_minor = minor;
@@ -636,14 +638,14 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
636 if (info->rq == NULL) 638 if (info->rq == NULL)
637 return; 639 return;
638 640
639 spin_lock_irqsave(&blkif_io_lock, flags); 641 spin_lock_irqsave(&info->io_lock, flags);
640 642
641 /* No more blkif_request(). */ 643 /* No more blkif_request(). */
642 blk_stop_queue(info->rq); 644 blk_stop_queue(info->rq);
643 645
644 /* No more gnttab callback work. */ 646 /* No more gnttab callback work. */
645 gnttab_cancel_free_callback(&info->callback); 647 gnttab_cancel_free_callback(&info->callback);
646 spin_unlock_irqrestore(&blkif_io_lock, flags); 648 spin_unlock_irqrestore(&info->io_lock, flags);
647 649
648 /* Flush gnttab callback work. Must be done with no locks held. */ 650 /* Flush gnttab callback work. Must be done with no locks held. */
649 flush_work_sync(&info->work); 651 flush_work_sync(&info->work);
@@ -675,16 +677,16 @@ static void blkif_restart_queue(struct work_struct *work)
675{ 677{
676 struct blkfront_info *info = container_of(work, struct blkfront_info, work); 678 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
677 679
678 spin_lock_irq(&blkif_io_lock); 680 spin_lock_irq(&info->io_lock);
679 if (info->connected == BLKIF_STATE_CONNECTED) 681 if (info->connected == BLKIF_STATE_CONNECTED)
680 kick_pending_request_queues(info); 682 kick_pending_request_queues(info);
681 spin_unlock_irq(&blkif_io_lock); 683 spin_unlock_irq(&info->io_lock);
682} 684}
683 685
684static void blkif_free(struct blkfront_info *info, int suspend) 686static void blkif_free(struct blkfront_info *info, int suspend)
685{ 687{
686 /* Prevent new requests being issued until we fix things up. */ 688 /* Prevent new requests being issued until we fix things up. */
687 spin_lock_irq(&blkif_io_lock); 689 spin_lock_irq(&info->io_lock);
688 info->connected = suspend ? 690 info->connected = suspend ?
689 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 691 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
690 /* No more blkif_request(). */ 692 /* No more blkif_request(). */
@@ -692,7 +694,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
692 blk_stop_queue(info->rq); 694 blk_stop_queue(info->rq);
693 /* No more gnttab callback work. */ 695 /* No more gnttab callback work. */
694 gnttab_cancel_free_callback(&info->callback); 696 gnttab_cancel_free_callback(&info->callback);
695 spin_unlock_irq(&blkif_io_lock); 697 spin_unlock_irq(&info->io_lock);
696 698
697 /* Flush gnttab callback work. Must be done with no locks held. */ 699 /* Flush gnttab callback work. Must be done with no locks held. */
698 flush_work_sync(&info->work); 700 flush_work_sync(&info->work);
@@ -728,10 +730,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
728 struct blkfront_info *info = (struct blkfront_info *)dev_id; 730 struct blkfront_info *info = (struct blkfront_info *)dev_id;
729 int error; 731 int error;
730 732
731 spin_lock_irqsave(&blkif_io_lock, flags); 733 spin_lock_irqsave(&info->io_lock, flags);
732 734
733 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 735 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
734 spin_unlock_irqrestore(&blkif_io_lock, flags); 736 spin_unlock_irqrestore(&info->io_lock, flags);
735 return IRQ_HANDLED; 737 return IRQ_HANDLED;
736 } 738 }
737 739
@@ -816,7 +818,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
816 818
817 kick_pending_request_queues(info); 819 kick_pending_request_queues(info);
818 820
819 spin_unlock_irqrestore(&blkif_io_lock, flags); 821 spin_unlock_irqrestore(&info->io_lock, flags);
820 822
821 return IRQ_HANDLED; 823 return IRQ_HANDLED;
822} 824}
@@ -991,6 +993,7 @@ static int blkfront_probe(struct xenbus_device *dev,
991 } 993 }
992 994
993 mutex_init(&info->mutex); 995 mutex_init(&info->mutex);
996 spin_lock_init(&info->io_lock);
994 info->xbdev = dev; 997 info->xbdev = dev;
995 info->vdevice = vdevice; 998 info->vdevice = vdevice;
996 info->connected = BLKIF_STATE_DISCONNECTED; 999 info->connected = BLKIF_STATE_DISCONNECTED;
@@ -1068,7 +1071,7 @@ static int blkif_recover(struct blkfront_info *info)
1068 1071
1069 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1072 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1070 1073
1071 spin_lock_irq(&blkif_io_lock); 1074 spin_lock_irq(&info->io_lock);
1072 1075
1073 /* Now safe for us to use the shared ring */ 1076 /* Now safe for us to use the shared ring */
1074 info->connected = BLKIF_STATE_CONNECTED; 1077 info->connected = BLKIF_STATE_CONNECTED;
@@ -1079,7 +1082,7 @@ static int blkif_recover(struct blkfront_info *info)
1079 /* Kick any other new requests queued since we resumed */ 1082 /* Kick any other new requests queued since we resumed */
1080 kick_pending_request_queues(info); 1083 kick_pending_request_queues(info);
1081 1084
1082 spin_unlock_irq(&blkif_io_lock); 1085 spin_unlock_irq(&info->io_lock);
1083 1086
1084 return 0; 1087 return 0;
1085} 1088}
@@ -1277,10 +1280,10 @@ static void blkfront_connect(struct blkfront_info *info)
1277 xenbus_switch_state(info->xbdev, XenbusStateConnected); 1280 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1278 1281
1279 /* Kick pending requests. */ 1282 /* Kick pending requests. */
1280 spin_lock_irq(&blkif_io_lock); 1283 spin_lock_irq(&info->io_lock);
1281 info->connected = BLKIF_STATE_CONNECTED; 1284 info->connected = BLKIF_STATE_CONNECTED;
1282 kick_pending_request_queues(info); 1285 kick_pending_request_queues(info);
1283 spin_unlock_irq(&blkif_io_lock); 1286 spin_unlock_irq(&info->io_lock);
1284 1287
1285 add_disk(info->gd); 1288 add_disk(info->gd);
1286 1289
@@ -1410,7 +1413,6 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
1410 mutex_lock(&blkfront_mutex); 1413 mutex_lock(&blkfront_mutex);
1411 1414
1412 bdev = bdget_disk(disk, 0); 1415 bdev = bdget_disk(disk, 0);
1413 bdput(bdev);
1414 1416
1415 if (bdev->bd_openers) 1417 if (bdev->bd_openers)
1416 goto out; 1418 goto out;
@@ -1441,6 +1443,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
1441 } 1443 }
1442 1444
1443out: 1445out:
1446 bdput(bdev);
1444 mutex_unlock(&blkfront_mutex); 1447 mutex_unlock(&blkfront_mutex);
1445 return 0; 1448 return 0;
1446} 1449}
@@ -1475,6 +1478,9 @@ static int __init xlblk_init(void)
1475 if (!xen_domain()) 1478 if (!xen_domain())
1476 return -ENODEV; 1479 return -ENODEV;
1477 1480
1481 if (xen_hvm_domain() && !xen_platform_pci_unplug)
1482 return -ENODEV;
1483
1478 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 1484 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1479 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", 1485 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1480 XENVBD_MAJOR, DEV_NAME); 1486 XENVBD_MAJOR, DEV_NAME);
@@ -1494,7 +1500,9 @@ module_init(xlblk_init);
1494 1500
1495static void __exit xlblk_exit(void) 1501static void __exit xlblk_exit(void)
1496{ 1502{
1497 return xenbus_unregister_driver(&blkfront_driver); 1503 xenbus_unregister_driver(&blkfront_driver);
1504 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1505 kfree(minors);
1498} 1506}
1499module_exit(xlblk_exit); 1507module_exit(xlblk_exit);
1500 1508